OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * |
| 4 * Copyright (C) 1999-2007, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ****************************************************************************** |
| 8 * file name: ubidiimp.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 1999aug06 |
| 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche |
| 15 */ |
| 16 |
| 17 #ifndef UBIDIIMP_H |
| 18 #define UBIDIIMP_H |
| 19 |
| 20 /* set import/export definitions */ |
| 21 #ifdef U_COMMON_IMPLEMENTATION |
| 22 |
| 23 #include "unicode/utypes.h" |
| 24 #include "unicode/uchar.h" |
| 25 #include "ubidi_props.h" |
| 26 |
| 27 /* miscellaneous definitions ---------------------------------------------- */ |
| 28 |
| 29 typedef uint8_t DirProp; |
| 30 typedef uint32_t Flags; |
| 31 |
| 32 /* Comparing the description of the BiDi algorithm with this implementation |
| 33 is easier with the same names for the BiDi types in the code as there. |
| 34 See UCharDirection in uchar.h . |
| 35 */ |
| 36 enum { |
| 37 L= U_LEFT_TO_RIGHT, |
| 38 R= U_RIGHT_TO_LEFT, |
| 39 EN= U_EUROPEAN_NUMBER, |
| 40 ES= U_EUROPEAN_NUMBER_SEPARATOR, |
| 41 ET= U_EUROPEAN_NUMBER_TERMINATOR, |
| 42 AN= U_ARABIC_NUMBER, |
| 43 CS= U_COMMON_NUMBER_SEPARATOR, |
| 44 B= U_BLOCK_SEPARATOR, |
| 45 S= U_SEGMENT_SEPARATOR, |
| 46 WS= U_WHITE_SPACE_NEUTRAL, |
| 47 ON= U_OTHER_NEUTRAL, |
| 48 LRE=U_LEFT_TO_RIGHT_EMBEDDING, |
| 49 LRO=U_LEFT_TO_RIGHT_OVERRIDE, |
| 50 AL= U_RIGHT_TO_LEFT_ARABIC, |
| 51 RLE=U_RIGHT_TO_LEFT_EMBEDDING, |
| 52 RLO=U_RIGHT_TO_LEFT_OVERRIDE, |
| 53 PDF=U_POP_DIRECTIONAL_FORMAT, |
| 54 NSM=U_DIR_NON_SPACING_MARK, |
| 55 BN= U_BOUNDARY_NEUTRAL, |
| 56 dirPropCount |
| 57 }; |
| 58 |
| 59 /* |
| 60 * Sometimes, bit values are more appropriate |
| 61 * to deal with directionality properties. |
| 62 * Abbreviations in these macro names refer to names |
| 63 * used in the BiDi algorithm. |
| 64 */ |
| 65 #define DIRPROP_FLAG(dir) (1UL<<(dir)) |
| 66 |
| 67 /* special flag for multiple runs from explicit embedding codes */ |
| 68 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) |
| 69 |
| 70 /* are there any characters that are LTR or RTL? */ |
| 71 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG
(LRE)|DIRPROP_FLAG(LRO)) |
| 72 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLA
G(RLO)) |
| 73 #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) |
| 74 |
| 75 /* explicit embedding codes */ |
| 76 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) |
| 77 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) |
| 78 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) |
| 79 |
| 80 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) |
| 81 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) |
| 82 |
| 83 /* paragraph and segment separators */ |
| 84 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) |
| 85 |
| 86 /* all types that are counted as White Space or Neutral in some steps */ |
| 87 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) |
| 88 #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) |
| 89 |
| 90 /* all types that are included in a sequence of European Terminators for (W5) */ |
| 91 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) |
| 92 |
| 93 /* types that are neutrals or could becomes neutrals in (Wn) */ |
| 94 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK
_N) |
| 95 |
| 96 /* |
| 97 * These types may be changed to "e", |
| 98 * the embedding type (L or R) of the run, |
| 99 * in the BiDi algorithm (N2) |
| 100 */ |
| 101 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) |
| 102 |
| 103 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ |
| 104 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) |
| 105 |
| 106 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) |
| 107 |
| 108 /* |
| 109 * The following bit is ORed to the property of characters in paragraphs |
| 110 * with contextual RTL direction when paraLevel is contextual. |
| 111 */ |
| 112 #define CONTEXT_RTL 0x80 |
| 113 #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) |
| 114 /* |
| 115 * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. |
| 116 */ |
| 117 #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) |
| 118 |
| 119 #define GET_PARALEVEL(ubidi, index) \ |
| 120 (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7
\ |
| 121 : (ubidi)->paraLevel) |
| 122 |
| 123 /* Paragraph type for multiple paragraph support ---------------------------- */ |
| 124 typedef int32_t Para; |
| 125 |
| 126 #define CR 0x000D |
| 127 #define LF 0x000A |
| 128 |
| 129 /* Run structure for reordering --------------------------------------------- */ |
| 130 enum { |
| 131 LRM_BEFORE=1, |
| 132 LRM_AFTER=2, |
| 133 RLM_BEFORE=4, |
| 134 RLM_AFTER=8 |
| 135 }; |
| 136 |
| 137 typedef struct Run { |
| 138 int32_t logicalStart, /* first character of the run; b31 indicates even/od
d level */ |
| 139 visualLimit, /* last visual position of the run +1 */ |
| 140 insertRemove; /* if >0, flags for inserting LRM/RLM before/after r
un, |
| 141 if <0, count of bidi controls within run
*/ |
| 142 } Run; |
| 143 |
| 144 /* in a Run, logicalStart will get this bit set if the run level is odd */ |
| 145 #define INDEX_ODD_BIT (1UL<<31) |
| 146 |
| 147 #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) |
| 148 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) |
| 149 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) |
| 150 |
| 151 #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) |
| 152 #define GET_ODD_BIT(x) ((uint32_t)(x)>>31) |
| 153 #define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) |
| 154 #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) |
| 155 |
| 156 U_CFUNC UBool |
| 157 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); |
| 158 |
| 159 /** BiDi control code points */ |
| 160 enum { |
| 161 ZWNJ_CHAR=0x200c, |
| 162 ZWJ_CHAR, |
| 163 LRM_CHAR, |
| 164 RLM_CHAR, |
| 165 LRE_CHAR=0x202a, |
| 166 RLE_CHAR, |
| 167 PDF_CHAR, |
| 168 LRO_CHAR, |
| 169 RLO_CHAR |
| 170 }; |
| 171 |
| 172 #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint3
2_t)((c)-LRE_CHAR)<5) |
| 173 |
| 174 /* InsertPoints structure for noting where to put BiDi marks ---------------- */ |
| 175 |
| 176 typedef struct Point { |
| 177 int32_t pos; /* position in text */ |
| 178 int32_t flag; /* flag for LRM/RLM, before/after */ |
| 179 } Point; |
| 180 |
| 181 typedef struct InsertPoints { |
| 182 int32_t capacity; /* number of points allocated */ |
| 183 int32_t size; /* number of points used */ |
| 184 int32_t confirmed; /* number of points confirmed */ |
| 185 UErrorCode errorCode; /* for eventual memory shortage */ |
| 186 Point *points; /* pointer to array of points */ |
| 187 } InsertPoints; |
| 188 |
| 189 |
| 190 /* UBiDi structure ----------------------------------------------------------- *
/ |
| 191 |
| 192 struct UBiDi { |
| 193 /* pointer to parent paragraph object (pointer to self if this object is |
| 194 * a paragraph object); set to NULL in a newly opened object; set to a |
| 195 * real value after a successful execution of ubidi_setPara or ubidi_setLine |
| 196 */ |
| 197 const UBiDi * pParaBiDi; |
| 198 |
| 199 const UBiDiProps *bdp; |
| 200 |
| 201 /* alias pointer to the current text */ |
| 202 const UChar *text; |
| 203 |
| 204 /* length of the current text */ |
| 205 int32_t originalLength; |
| 206 |
| 207 /* if the UBIDI_OPTION_STREAMING option is set, this is the length |
| 208 * of text actually processed by ubidi_setPara, which may be shorter than |
| 209 * the original length. |
| 210 * Otherwise, it is identical to the original length. |
| 211 */ |
| 212 int32_t length; |
| 213 |
| 214 /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or |
| 215 * marks are allowed to be inserted in one of the reordering mode, the |
| 216 * length of the result string may be different from the processed length. |
| 217 */ |
| 218 int32_t resultLength; |
| 219 |
| 220 /* memory sizes in bytes */ |
| 221 int32_t dirPropsSize, levelsSize, parasSize, runsSize; |
| 222 |
| 223 /* allocated memory */ |
| 224 DirProp *dirPropsMemory; |
| 225 UBiDiLevel *levelsMemory; |
| 226 Para *parasMemory; |
| 227 Run *runsMemory; |
| 228 |
| 229 /* indicators for whether memory may be allocated after ubidi_open() */ |
| 230 UBool mayAllocateText, mayAllocateRuns; |
| 231 |
| 232 /* arrays with one value per text-character */ |
| 233 const DirProp *dirProps; |
| 234 UBiDiLevel *levels; |
| 235 |
| 236 /* are we performing an approximation of the "inverse BiDi" algorithm? */ |
| 237 UBool isInverse; |
| 238 |
| 239 /* are we using the basic algorithm or its variation? */ |
| 240 UBiDiReorderingMode reorderingMode; |
| 241 |
| 242 /* UBIDI_REORDER_xxx values must be ordered so that all the regular |
| 243 * logical to visual modes come first, and all inverse BiDi modes |
| 244 * come last. |
| 245 */ |
| 246 #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIA
L |
| 247 |
| 248 /* bitmask for reordering options */ |
| 249 uint32_t reorderingOptions; |
| 250 |
| 251 /* must block separators receive level 0? */ |
| 252 UBool orderParagraphsLTR; |
| 253 |
| 254 /* the paragraph level */ |
| 255 UBiDiLevel paraLevel; |
| 256 /* original paraLevel when contextual */ |
| 257 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ |
| 258 UBiDiLevel defaultParaLevel; |
| 259 |
| 260 /* the following is set in ubidi_setPara, used in processPropertySeq */ |
| 261 const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pai
r */ |
| 262 |
| 263 /* the overall paragraph or line directionality - see UBiDiDirection */ |
| 264 UBiDiDirection direction; |
| 265 |
| 266 /* flags is a bit set for which directional properties are in the text */ |
| 267 Flags flags; |
| 268 |
| 269 /* lastArabicPos is index to the last AL in the text, -1 if none */ |
| 270 int32_t lastArabicPos; |
| 271 |
| 272 /* characters after trailingWSStart are WS and are */ |
| 273 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ |
| 274 int32_t trailingWSStart; |
| 275 |
| 276 /* fields for paragraph handling */ |
| 277 int32_t paraCount; /* set in getDirProps() */ |
| 278 Para *paras; /* limits of paragraphs, filled in |
| 279 ResolveExplicitLevels() or CheckExplicitLevels() */ |
| 280 |
| 281 /* for single paragraph text, we only need a tiny array of paras (no malloc(
)) */ |
| 282 Para simpleParas[1]; |
| 283 |
| 284 /* fields for line reordering */ |
| 285 int32_t runCount; /* ==-1: runs not set up yet */ |
| 286 Run *runs; |
| 287 |
| 288 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ |
| 289 Run simpleRuns[1]; |
| 290 |
| 291 /* for inverse Bidi with insertion of directional marks */ |
| 292 InsertPoints insertPoints; |
| 293 |
| 294 /* for option UBIDI_OPTION_REMOVE_CONTROLS */ |
| 295 int32_t controlCount; |
| 296 |
| 297 /* for Bidi class callback */ |
| 298 UBiDiClassCallback *fnClassCallback; /* action pointer */ |
| 299 const void *coClassCallback; /* context pointer */ |
| 300 }; |
| 301 |
| 302 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) |
| 303 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiD
i) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) |
| 304 |
| 305 typedef union { |
| 306 DirProp *dirPropsMemory; |
| 307 UBiDiLevel *levelsMemory; |
| 308 Para *parasMemory; |
| 309 Run *runsMemory; |
| 310 } BidiMemoryForAllocation; |
| 311 |
| 312 /* Macros for initial checks at function entry */ |
| 313 #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ |
| 314 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue |
| 315 #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ |
| 316 if(!IS_VALID_PARA(bidi)) { \ |
| 317 errcode=U_INVALID_STATE_ERROR; \ |
| 318 return retvalue; \ |
| 319 } |
| 320 #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ |
| 321 if(!IS_VALID_PARA_OR_LINE(bidi)) { \ |
| 322 errcode=U_INVALID_STATE_ERROR; \ |
| 323 return retvalue; \ |
| 324 } |
| 325 #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ |
| 326 if((arg)<(start) || (arg)>=(limit)) { \ |
| 327 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ |
| 328 return retvalue; \ |
| 329 } |
| 330 |
| 331 #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ |
| 332 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return |
| 333 #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ |
| 334 if(!IS_VALID_PARA(bidi)) { \ |
| 335 errcode=U_INVALID_STATE_ERROR; \ |
| 336 return; \ |
| 337 } |
| 338 #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ |
| 339 if(!IS_VALID_PARA_OR_LINE(bidi)) { \ |
| 340 errcode=U_INVALID_STATE_ERROR; \ |
| 341 return; \ |
| 342 } |
| 343 #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ |
| 344 if((arg)<(start) || (arg)>=(limit)) { \ |
| 345 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ |
| 346 return; \ |
| 347 } |
| 348 |
| 349 /* helper function to (re)allocate memory if allowed */ |
| 350 U_CFUNC UBool |
| 351 ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAlloc
ate, int32_t sizeNeeded); |
| 352 |
| 353 /* helper macros for each allocated array in UBiDi */ |
| 354 #define getDirPropsMemory(pBiDi, length) \ |
| 355 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(p
BiDi)->dirPropsSize, \ |
| 356 (pBiDi)->mayAllocateText, (length)) |
| 357 |
| 358 #define getLevelsMemory(pBiDi, length) \ |
| 359 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBi
Di)->levelsSize, \ |
| 360 (pBiDi)->mayAllocateText, (length)) |
| 361 |
| 362 #define getRunsMemory(pBiDi, length) \ |
| 363 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi
)->runsSize, \ |
| 364 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) |
| 365 |
| 366 /* additional macros used by ubidi_open() - always allow allocation */ |
| 367 #define getInitialDirPropsMemory(pBiDi, length) \ |
| 368 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(p
BiDi)->dirPropsSize, \ |
| 369 TRUE, (length)) |
| 370 |
| 371 #define getInitialLevelsMemory(pBiDi, length) \ |
| 372 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBi
Di)->levelsSize, \ |
| 373 TRUE, (length)) |
| 374 |
| 375 #define getInitialParasMemory(pBiDi, length) \ |
| 376 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiD
i)->parasSize, \ |
| 377 TRUE, (length)*sizeof(Para)) |
| 378 |
| 379 #define getInitialRunsMemory(pBiDi, length) \ |
| 380 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi
)->runsSize, \ |
| 381 TRUE, (length)*sizeof(Run)) |
| 382 |
| 383 #endif |
| 384 |
| 385 #endif |
OLD | NEW |