| Index: source/common/ubidi.c
|
| diff --git a/source/common/ubidi.c b/source/common/ubidi.c
|
| index edccccf1404e08c7b8db8e61eeb8834c3b0a4b9d..8c0d64b039886930eb4cef319d47d0357576c949 100644
|
| --- a/source/common/ubidi.c
|
| +++ b/source/common/ubidi.c
|
| @@ -1,7 +1,7 @@
|
| /*
|
| ******************************************************************************
|
| *
|
| -* Copyright (C) 1999-2013, International Business Machines
|
| +* Copyright (C) 1999-2014, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *
|
| ******************************************************************************
|
| @@ -29,8 +29,8 @@
|
| * General implementation notes:
|
| *
|
| * Throughout the implementation, there are comments like (W2) that refer to
|
| - * rules of the BiDi algorithm in its version 5, in this example to the second
|
| - * rule of the resolution of weak types.
|
| + * rules of the BiDi algorithm, in this example to the second rule of the
|
| + * resolution of weak types.
|
| *
|
| * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
|
| * character according to UTF-16, the second UChar gets the directional property of
|
| @@ -60,14 +60,12 @@
|
| * For the purpose of conformance, the levels of all these codes
|
| * do not matter.
|
| *
|
| - * Note that this implementation never modifies the dirProps
|
| - * after the initial setup, except for FSI which is changed to either
|
| - * LRI or RLI in getDirProps(), and paired brackets which may be changed
|
| - * to L or R according to N0.
|
| + * Note that this implementation modifies the dirProps
|
| + * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
|
| + * X6, N0 (replace paired brackets by L or R).
|
| *
|
| - *
|
| - * In this implementation, the resolution of weak types (Wn),
|
| - * neutrals (Nn), and the assignment of the resolved level (In)
|
| + * In this implementation, the resolution of weak types (W1 to W6),
|
| + * neutrals (N1 and N2), and the assignment of the resolved level (In)
|
| * are all done in one single loop, in resolveImplicitLevels().
|
| * Changes of dirProp values are done on the fly, without writing
|
| * them back to the dirProps array.
|
| @@ -114,11 +112,13 @@ static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
|
| static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
|
|
|
| #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
|
| -#define DIRPROP_FLAG_E(level) flagE[(level)&1]
|
| -#define DIRPROP_FLAG_O(level) flagO[(level)&1]
|
| +#define DIRPROP_FLAG_E(level) flagE[(level)&1]
|
| +#define DIRPROP_FLAG_O(level) flagO[(level)&1]
|
|
|
| #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
|
|
|
| +#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
|
| +
|
| /* UBiDi object management -------------------------------------------------- */
|
|
|
| U_CAPI UBiDi * U_EXPORT2
|
| @@ -403,12 +403,12 @@ static UBool
|
| checkParaCount(UBiDi *pBiDi) {
|
| int32_t count=pBiDi->paraCount;
|
| if(pBiDi->paras==pBiDi->simpleParas) {
|
| - if(count<=SIMPLE_PARAS_SIZE)
|
| + if(count<=SIMPLE_PARAS_COUNT)
|
| return TRUE;
|
| - if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2))
|
| + if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
|
| return FALSE;
|
| pBiDi->paras=pBiDi->parasMemory;
|
| - uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para));
|
| + uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
|
| return TRUE;
|
| }
|
| if(!getInitialParasMemory(pBiDi, count * 2))
|
| @@ -421,6 +421,9 @@ checkParaCount(UBiDi *pBiDi) {
|
| * Get the directional properties for the text, calculate the flags bit-set, and
|
| * determine the paragraph level if necessary (in pBiDi->paras[i].level).
|
| * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
|
| + * When encountering an FSI, it is initially replaced with an LRI, which is the
|
| + * default. Only if a strong R or AL is found within its scope will the LRI be
|
| + * replaced by an RLI.
|
| */
|
| static UBool
|
| getDirProps(UBiDi *pBiDi) {
|
| @@ -508,7 +511,8 @@ getDirProps(UBiDi *pBiDi) {
|
| }
|
| else if(state==SEEKING_STRONG_FOR_FSI) {
|
| if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
|
| - dirProps[isolateStartStack[stackLast]]=LRI;
|
| + /* no need for next statement, already set by default */
|
| + /* dirProps[isolateStartStack[stackLast]]=LRI; */
|
| flags|=DIRPROP_FLAG(LRI);
|
| }
|
| state=LOOKING_FOR_PDI;
|
| @@ -539,8 +543,10 @@ getDirProps(UBiDi *pBiDi) {
|
| isolateStartStack[stackLast]=i-1;
|
| previousStateStack[stackLast]=state;
|
| }
|
| - if(dirProp==FSI)
|
| + if(dirProp==FSI) {
|
| + dirProps[i-1]=LRI; /* default if no strong char */
|
| state=SEEKING_STRONG_FOR_FSI;
|
| + }
|
| else
|
| state=LOOKING_FOR_PDI;
|
| continue;
|
| @@ -548,7 +554,8 @@ getDirProps(UBiDi *pBiDi) {
|
| if(dirProp==PDI) {
|
| if(state==SEEKING_STRONG_FOR_FSI) {
|
| if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
|
| - dirProps[isolateStartStack[stackLast]]=LRI;
|
| + /* no need for next statement, already set by default */
|
| + /* dirProps[isolateStartStack[stackLast]]=LRI; */
|
| flags|=DIRPROP_FLAG(LRI);
|
| }
|
| }
|
| @@ -591,14 +598,15 @@ getDirProps(UBiDi *pBiDi) {
|
| /* Ignore still open isolate sequences with overflow */
|
| if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
|
| stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
|
| - if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI)
|
| - state=LOOKING_FOR_PDI;
|
| + state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
|
| }
|
| /* Resolve direction of still unresolved open FSI sequences */
|
| while(stackLast>=0) {
|
| if(state==SEEKING_STRONG_FOR_FSI) {
|
| - dirProps[isolateStartStack[stackLast]]=LRI;
|
| + /* no need for next statement, already set by default */
|
| + /* dirProps[isolateStartStack[stackLast]]=LRI; */
|
| flags|=DIRPROP_FLAG(LRI);
|
| + break;
|
| }
|
| state=previousStateStack[stackLast];
|
| stackLast--;
|
| @@ -667,14 +675,14 @@ bracketInit(UBiDi *pBiDi, BracketData *bd) {
|
| bd->isoRuns[0].start=0;
|
| bd->isoRuns[0].limit=0;
|
| bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
|
| - bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
|
| - bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
|
| + bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
|
| + bd->isoRuns[0].contextPos=0;
|
| if(pBiDi->openingsMemory) {
|
| bd->openings=pBiDi->openingsMemory;
|
| bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
|
| } else {
|
| bd->openings=bd->simpleOpenings;
|
| - bd->openingsCount=SIMPLE_OPENINGS_SIZE;
|
| + bd->openingsCount=SIMPLE_OPENINGS_COUNT;
|
| }
|
| bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
|
| bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
|
| @@ -686,8 +694,8 @@ bracketProcessB(BracketData *bd, UBiDiLevel level) {
|
| bd->isoRunLast=0;
|
| bd->isoRuns[0].limit=0;
|
| bd->isoRuns[0].level=level;
|
| - bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1;
|
| - bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
|
| + bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=level&1;
|
| + bd->isoRuns[0].contextPos=0;
|
| }
|
|
|
| /* LRE, LRO, RLE, RLO, PDF */
|
| @@ -698,13 +706,12 @@ bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
|
| DirProp *dirProps=bd->pBiDi->dirProps;
|
| if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
|
| return;
|
| - if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)>
|
| - (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */
|
| + if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
|
| contextLevel=embeddingLevel;
|
| pLastIsoRun->limit=pLastIsoRun->start;
|
| pLastIsoRun->level=embeddingLevel;
|
| - pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1;
|
| - pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos;
|
| + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=contextLevel&1;
|
| + pLastIsoRun->contextPos=lastCcPos;
|
| }
|
|
|
| /* LRI or RLI */
|
| @@ -712,19 +719,23 @@ static void
|
| bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
|
| IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
|
| int16_t lastLimit;
|
| + pLastIsoRun->lastBase=ON;
|
| lastLimit=pLastIsoRun->limit;
|
| bd->isoRunLast++;
|
| pLastIsoRun++;
|
| pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
|
| pLastIsoRun->level=level;
|
| - pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1;
|
| - pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0;
|
| + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=level&1;
|
| + pLastIsoRun->contextPos=0;
|
| }
|
|
|
| /* PDI */
|
| static void
|
| bracketProcessPDI(BracketData *bd) {
|
| + IsoRun *pLastIsoRun;
|
| bd->isoRunLast--;
|
| + pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
|
| + pLastIsoRun->lastBase=ON;
|
| }
|
|
|
| /* newly found opening bracket: create an openings entry */
|
| @@ -738,7 +749,7 @@ bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
|
| return FALSE;
|
| if(bd->openings==bd->simpleOpenings)
|
| uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
|
| - SIMPLE_OPENINGS_SIZE * sizeof(Opening));
|
| + SIMPLE_OPENINGS_COUNT * sizeof(Opening));
|
| bd->openings=pBiDi->openingsMemory; /* may have changed */
|
| bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
|
| }
|
| @@ -770,159 +781,220 @@ fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp n
|
| if(newProp==qOpening->contextDir)
|
| break;
|
| openingPosition=qOpening->position;
|
| - dirProps[openingPosition]=dirProps[newPropPosition];
|
| + dirProps[openingPosition]=newProp;
|
| closingPosition=-(qOpening->match);
|
| - dirProps[closingPosition]= newProp; /* can never be AL */
|
| - qOpening->match=0; /* prevent further changes */
|
| + dirProps[closingPosition]=newProp;
|
| + qOpening->match=0; /* prevent further changes */
|
| fixN0c(bd, k, openingPosition, newProp);
|
| fixN0c(bd, k, closingPosition, newProp);
|
| }
|
| }
|
|
|
| -/* handle strong characters, digits and candidates for closing brackets */
|
| -static UBool /* return TRUE if success */
|
| -bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) {
|
| - IsoRun *pLastIsoRun;
|
| +/* process closing bracket */
|
| +static DirProp /* return L or R if N0b or N0c, ON if N0d */
|
| +bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
|
| + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
|
| Opening *pOpening, *qOpening;
|
| - DirProp *dirProps, newProp;
|
| UBiDiDirection direction;
|
| - uint16_t flag;
|
| - int32_t i, k;
|
| UBool stable;
|
| - UChar c, match;
|
| + DirProp newProp;
|
| + pOpening=&bd->openings[openIdx];
|
| + direction=pLastIsoRun->level&1;
|
| + stable=TRUE; /* assume stable until proved otherwise */
|
| +
|
| + /* The stable flag is set when brackets are paired and their
|
| + level is resolved and cannot be changed by what will be
|
| + found later in the source string.
|
| + An unstable match can occur only when applying N0c, where
|
| + the resolved level depends on the preceding context, and
|
| + this context may be affected by text occurring later.
|
| + Example: RTL paragraph containing: abc[(latin) HEBREW]
|
| + When the closing parenthesis is encountered, it appears
|
| + that N0c1 must be applied since 'abc' sets an opposite
|
| + direction context and both parentheses receive level 2.
|
| + However, when the closing square bracket is processed,
|
| + N0b applies because of 'HEBREW' being included within the
|
| + brackets, thus the square brackets are treated like R and
|
| + receive level 1. However, this changes the preceding
|
| + context of the opening parenthesis, and it now appears
|
| + that N0c2 must be applied to the parentheses rather than
|
| + N0c1. */
|
| +
|
| + if((direction==0 && pOpening->flags&FOUND_L) ||
|
| + (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
|
| + newProp=direction;
|
| + }
|
| + else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
|
| + /* it is stable if there is no containing pair or in
|
| + conditions too complicated and not worth checking */
|
| + stable=(openIdx==pLastIsoRun->start);
|
| + if(direction!=pOpening->contextDir)
|
| + newProp=pOpening->contextDir; /* N0c1 */
|
| + else
|
| + newProp=direction; /* N0c2 */
|
| + } else {
|
| + /* forget this and any brackets nested within this pair */
|
| + pLastIsoRun->limit=openIdx;
|
| + return ON; /* N0d */
|
| + }
|
| + bd->pBiDi->dirProps[pOpening->position]=newProp;
|
| + bd->pBiDi->dirProps[position]=newProp;
|
| + /* Update nested N0c pairs that may be affected */
|
| + fixN0c(bd, openIdx, pOpening->position, newProp);
|
| + if(stable) {
|
| + pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
|
| + /* remove lower located synonyms if any */
|
| + while(pLastIsoRun->limit>pLastIsoRun->start &&
|
| + bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
|
| + pLastIsoRun->limit--;
|
| + } else {
|
| + int32_t k;
|
| + pOpening->match=-position;
|
| + /* neutralize lower located synonyms if any */
|
| + k=openIdx-1;
|
| + while(k>=pLastIsoRun->start &&
|
| + bd->openings[k].position==pOpening->position)
|
| + bd->openings[k--].match=0;
|
| + /* neutralize any unmatched opening between the current pair;
|
| + this will also neutralize higher located synonyms if any */
|
| + for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
|
| + qOpening=&bd->openings[k];
|
| + if(qOpening->position>=position)
|
| + break;
|
| + if(qOpening->match>0)
|
| + qOpening->match=0;
|
| + }
|
| + }
|
| + return newProp;
|
| +}
|
| +
|
| +/* handle strong characters, digits and candidates for closing brackets */
|
| +static UBool /* return TRUE if success */
|
| +bracketProcessChar(BracketData *bd, int32_t position) {
|
| + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
|
| + DirProp *dirProps, dirProp, newProp;
|
| + UBiDiLevel level;
|
| dirProps=bd->pBiDi->dirProps;
|
| - if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */
|
| - pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
|
| - /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */
|
| - if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL))
|
| - dirProp=pLastIsoRun->lastStrong;
|
| - /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */
|
| - if(dirProp==EN) {
|
| - if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) {
|
| - dirProp=L;
|
| - if(!bd->isNumbersSpecial)
|
| - dirProps[position]=ENL;
|
| + dirProp=dirProps[position];
|
| + if(dirProp==ON) {
|
| + UChar c, match;
|
| + int32_t idx;
|
| + /* First see if it is a matching closing bracket. Hopefully, this is
|
| + more efficient than checking if it is a closing bracket at all */
|
| + c=bd->pBiDi->text[position];
|
| + for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
|
| + if(bd->openings[idx].match!=c)
|
| + continue;
|
| + /* We have a match */
|
| + newProp=bracketProcessClosing(bd, idx, position);
|
| + if(newProp==ON) { /* N0d */
|
| + c=0; /* prevent handling as an opening */
|
| + break;
|
| }
|
| - else {
|
| - dirProp=pLastIsoRun->lastStrong; /* may be R or AL */
|
| - if(!bd->isNumbersSpecial)
|
| - dirProps[position]= dirProp==AL ? AN : ENR;
|
| + pLastIsoRun->lastBase=ON;
|
| + pLastIsoRun->contextDir=newProp;
|
| + pLastIsoRun->contextPos=position;
|
| + level=bd->pBiDi->levels[position];
|
| + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
|
| + uint16_t flag;
|
| + int32_t i;
|
| + newProp=level&1;
|
| + pLastIsoRun->lastStrong=newProp;
|
| + flag=DIRPROP_FLAG(newProp);
|
| + for(i=pLastIsoRun->start; i<idx; i++)
|
| + bd->openings[i].flags|=flag;
|
| + /* matching brackets are not overridden by LRO/RLO */
|
| + bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
|
| }
|
| + /* matching brackets are not overridden by LRO/RLO */
|
| + bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
|
| + return TRUE;
|
| }
|
| - pLastIsoRun->lastStrong=dirProp;
|
| - pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp);
|
| - pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position;
|
| - if(dirProp==AL || dirProp==AN)
|
| - dirProp=R;
|
| - flag=DIRPROP_FLAG(dirProp);
|
| - /* strong characters found after an unmatched opening bracket
|
| - must be noted for possibly applying N0b */
|
| - for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
|
| - bd->openings[i].flags|=flag;
|
| - return TRUE;
|
| - }
|
| - if(dirProp!=ON)
|
| - return TRUE;
|
| - /* First see if it is a matching closing bracket. Hopefully, this is more
|
| - efficient than checking if it is a closing bracket at all */
|
| - c=bd->pBiDi->text[position];
|
| - pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
|
| - for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) {
|
| - if(bd->openings[i].match!=c)
|
| - continue;
|
| - /* We have a match */
|
| - pOpening=&bd->openings[i];
|
| - direction=pLastIsoRun->level&1;
|
| - stable=TRUE; /* assume stable until proved otherwise */
|
| -
|
| - /* The stable flag is set when brackets are paired and their
|
| - level is resolved and cannot be changed by what will be
|
| - found later in the source string.
|
| - An unstable match can occur only when applying N0c, where
|
| - the resolved level depends on the preceding context, and
|
| - this context may be affected by text occurring later.
|
| - Example: RTL paragraph containing: abc[(latin) HEBREW]
|
| - When the closing parenthesis is encountered, it appears
|
| - that N0c1 must be applied since 'abc' sets an opposite
|
| - direction context and both parentheses receive level 2.
|
| - However, when the closing square bracket is processed,
|
| - N0b applies because of 'HEBREW' being included within the
|
| - brackets, thus the square brackets are treated like R and
|
| - receive level 1. However, this changes the preceding
|
| - context of the opening parenthesis, and it now appears
|
| - that N0c2 must be applied to the parentheses rather than
|
| - N0c1. */
|
| -
|
| - if((direction==0 && pOpening->flags&FOUND_L) ||
|
| - (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
|
| - newProp=direction;
|
| - }
|
| - else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
|
| - if(direction!=pOpening->contextDir) {
|
| - newProp=pOpening->contextDir; /* N0c1 */
|
| - /* it is stable if there is no preceding text or in
|
| - conditions too complicated and not worth checking */
|
| - stable=(i==pLastIsoRun->start);
|
| + /* We get here only if the ON character is not a matching closing
|
| + bracket or it is a case of N0d */
|
| + /* Now see if it is an opening bracket */
|
| + if(c)
|
| + match=u_getBidiPairedBracket(c); /* get the matching char */
|
| + else
|
| + match=0;
|
| + if(match!=c && /* has a matching char */
|
| + ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
|
| + /* special case: process synonyms
|
| + create an opening entry for each synonym */
|
| + if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
|
| + if(!bracketAddOpening(bd, 0x3009, position))
|
| + return FALSE;
|
| }
|
| - else
|
| - newProp=direction; /* N0c2 */
|
| - }
|
| - else {
|
| - newProp=BN; /* N0d */
|
| + else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
|
| + if(!bracketAddOpening(bd, 0x232A, position))
|
| + return FALSE;
|
| + }
|
| + if(!bracketAddOpening(bd, match, position))
|
| + return FALSE;
|
| }
|
| - if(newProp!=BN) {
|
| - dirProps[pOpening->position]=newProp;
|
| + }
|
| + level=bd->pBiDi->levels[position];
|
| + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
|
| + newProp=level&1;
|
| + if(dirProp!=S && dirProp!=WS && dirProp!=ON)
|
| dirProps[position]=newProp;
|
| - pLastIsoRun->contextDir=newProp;
|
| + pLastIsoRun->lastBase=newProp;
|
| + pLastIsoRun->lastStrong=newProp;
|
| + pLastIsoRun->contextDir=newProp;
|
| + pLastIsoRun->contextPos=position;
|
| + }
|
| + else if(dirProp<=R || dirProp==AL) {
|
| + newProp=DIR_FROM_STRONG(dirProp);
|
| + pLastIsoRun->lastBase=dirProp;
|
| + pLastIsoRun->lastStrong=dirProp;
|
| + pLastIsoRun->contextDir=newProp;
|
| + pLastIsoRun->contextPos=position;
|
| + }
|
| + else if(dirProp==EN) {
|
| + pLastIsoRun->lastBase=EN;
|
| + if(pLastIsoRun->lastStrong==L) {
|
| + newProp=L; /* W7 */
|
| + if(!bd->isNumbersSpecial)
|
| + dirProps[position]=ENL;
|
| + pLastIsoRun->contextDir=L;
|
| pLastIsoRun->contextPos=position;
|
| }
|
| - /* Update nested N0c pairs that may be affected */
|
| - if(newProp==direction)
|
| - fixN0c(bd, i, pOpening->position, newProp);
|
| - if(stable) {
|
| - pLastIsoRun->limit=i; /* forget any brackets nested within this pair */
|
| - /* remove lower located synonyms if any */
|
| - while(pLastIsoRun->limit>pLastIsoRun->start &&
|
| - bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
|
| - pLastIsoRun->limit--;
|
| - }
|
| else {
|
| - pOpening->match=-position;
|
| - /* neutralize lower located synonyms if any */
|
| - k=i-1;
|
| - while(k>=pLastIsoRun->start &&
|
| - bd->openings[k].position==pOpening->position)
|
| - bd->openings[k--].match=0;
|
| - /* neutralize any unmatched opening between the current pair;
|
| - this will also neutralize higher located synonyms if any */
|
| - for(k=i+1; k<pLastIsoRun->limit; k++) {
|
| - qOpening=&bd->openings[k];
|
| - if(qOpening->position>=position)
|
| - break;
|
| - if(qOpening->match>0)
|
| - qOpening->match=0;
|
| - }
|
| + newProp=R; /* N0 */
|
| + if(pLastIsoRun->lastStrong==AL)
|
| + dirProps[position]=AN; /* W2 */
|
| + else
|
| + dirProps[position]=ENR;
|
| + pLastIsoRun->contextDir=R;
|
| + pLastIsoRun->contextPos=position;
|
| }
|
| - return TRUE;
|
| }
|
| - /* We get here only if the ON character was not a matching closing bracket */
|
| - /* Now see if it is an opening bracket */
|
| - match=u_getBidiPairedBracket(c); /* get the matching char */
|
| - if(match==c) /* if no matching char */
|
| - return TRUE;
|
| - if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN)
|
| - return TRUE; /* not an opening bracket */
|
| - /* special case: process synonyms
|
| - create an opening entry for each synonym */
|
| - if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
|
| - if(!bracketAddOpening(bd, 0x3009, position))
|
| - return FALSE;
|
| + else if(dirProp==AN) {
|
| + newProp=R; /* N0 */
|
| + pLastIsoRun->lastBase=AN;
|
| + pLastIsoRun->contextDir=R;
|
| + pLastIsoRun->contextPos=position;
|
| }
|
| - else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
|
| - if(!bracketAddOpening(bd, 0x232A, position))
|
| - return FALSE;
|
| + else if(dirProp==NSM) {
|
| + /* if the last real char was ON, change NSM to ON so that it
|
| + will stay ON even if the last real char is a bracket which
|
| + may be changed to L or R */
|
| + newProp=pLastIsoRun->lastBase;
|
| + if(newProp==ON)
|
| + dirProps[position]=newProp;
|
| + }
|
| + else {
|
| + newProp=dirProp;
|
| + pLastIsoRun->lastBase=dirProp;
|
| + }
|
| + if(newProp<=R || newProp==AL) {
|
| + int32_t i;
|
| + uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
|
| + for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
|
| + if(position>bd->openings[i].position)
|
| + bd->openings[i].flags|=flag;
|
| }
|
| - return bracketAddOpening(bd, match, position);
|
| + return TRUE;
|
| }
|
|
|
| /* perform (X1)..(X9) ------------------------------------------------------- */
|
| @@ -980,7 +1052,7 @@ directionFromFlags(UBiDi *pBiDi) {
|
| * Handling the stack of explicit levels (Xn):
|
| *
|
| * With the BiDi stack of explicit levels, as pushed with each
|
| - * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI,
|
| + * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
|
| * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
|
| *
|
| * In order to have a correct push-pop semantics even in the case of overflows,
|
| @@ -988,6 +1060,9 @@ directionFromFlags(UBiDi *pBiDi) {
|
| * section 3.3.2 "Explicit Levels and Directions".
|
| *
|
| * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
|
| + *
|
| + * Returns normally the direction; -1 if there was a memory shortage
|
| + *
|
| */
|
| static UBiDiDirection
|
| resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| @@ -1044,6 +1119,8 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| for(i=start; i<limit; i++) {
|
| levels[i]=level;
|
| dirProp=dirProps[i];
|
| + if(dirProp==BN)
|
| + continue;
|
| if(dirProp==B) {
|
| if((i+1)<length) {
|
| if(text[i]==CR && text[i+1]==LF)
|
| @@ -1052,7 +1129,7 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| }
|
| continue;
|
| }
|
| - if(!bracketProcessChar(&bracketData, i, dirProp)) {
|
| + if(!bracketProcessChar(&bracketData, i)) {
|
| *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
| return UBIDI_LTR;
|
| }
|
| @@ -1069,6 +1146,8 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
|
| int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
|
|
|
| + /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
|
| + stackLast points to its current entry. */
|
| uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
|
| but we need one more entry as base */
|
| uint32_t stackLast=0;
|
| @@ -1091,10 +1170,13 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| case RLO:
|
| /* (X2, X3, X4, X5) */
|
| flags|=DIRPROP_FLAG(BN);
|
| + levels[i]=previousLevel;
|
| if (dirProp==LRE || dirProp==LRO)
|
| - newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
|
| + /* least greater even level */
|
| + newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
|
| else
|
| - newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
|
| + /* least greater odd level */
|
| + newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
|
| if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
|
| overflowEmbeddingCount==0) {
|
| lastCcPos=i;
|
| @@ -1108,7 +1190,6 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| the source for embeddingLevel.
|
| */
|
| } else {
|
| - dirProps[i]|=IGNORE_CC;
|
| if(overflowIsolateCount==0)
|
| overflowEmbeddingCount++;
|
| }
|
| @@ -1116,13 +1197,12 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| case PDF:
|
| /* (X7) */
|
| flags|=DIRPROP_FLAG(BN);
|
| + levels[i]=previousLevel;
|
| /* handle all the overflow cases first */
|
| if(overflowIsolateCount) {
|
| - dirProps[i]|=IGNORE_CC;
|
| break;
|
| }
|
| if(overflowEmbeddingCount) {
|
| - dirProps[i]|=IGNORE_CC;
|
| overflowEmbeddingCount--;
|
| break;
|
| }
|
| @@ -1130,50 +1210,58 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| lastCcPos=i;
|
| stackLast--;
|
| embeddingLevel=(UBiDiLevel)stack[stackLast];
|
| - } else
|
| - dirProps[i]|=IGNORE_CC;
|
| + }
|
| break;
|
| case LRI:
|
| case RLI:
|
| - if(embeddingLevel!=previousLevel) {
|
| + flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
|
| + levels[i]=NO_OVERRIDE(embeddingLevel);
|
| + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
|
| bracketProcessBoundary(&bracketData, lastCcPos,
|
| previousLevel, embeddingLevel);
|
| - previousLevel=embeddingLevel;
|
| + flags|=DIRPROP_FLAG_MULTI_RUNS;
|
| }
|
| + previousLevel=embeddingLevel;
|
| /* (X5a, X5b) */
|
| - flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
|
| - level=embeddingLevel;
|
| if(dirProp==LRI)
|
| - newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
|
| + /* least greater even level */
|
| + newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
|
| else
|
| - newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
|
| + /* least greater odd level */
|
| + newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
|
| if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
|
| overflowEmbeddingCount==0) {
|
| + flags|=DIRPROP_FLAG(dirProp);
|
| lastCcPos=i;
|
| - previousLevel=embeddingLevel;
|
| validIsolateCount++;
|
| if(validIsolateCount>pBiDi->isolateCount)
|
| pBiDi->isolateCount=validIsolateCount;
|
| embeddingLevel=newLevel;
|
| + /* we can increment stackLast without checking because newLevel
|
| + will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
|
| stackLast++;
|
| stack[stackLast]=embeddingLevel+ISOLATE;
|
| bracketProcessLRI_RLI(&bracketData, embeddingLevel);
|
| } else {
|
| - dirProps[i]|=IGNORE_CC;
|
| + /* make it WS so that it is handled by adjustWSLevels() */
|
| + dirProps[i]=WS;
|
| overflowIsolateCount++;
|
| }
|
| break;
|
| case PDI:
|
| - if(embeddingLevel!=previousLevel) {
|
| + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
|
| bracketProcessBoundary(&bracketData, lastCcPos,
|
| previousLevel, embeddingLevel);
|
| + flags|=DIRPROP_FLAG_MULTI_RUNS;
|
| }
|
| /* (X6a) */
|
| if(overflowIsolateCount) {
|
| - dirProps[i]|=IGNORE_CC;
|
| overflowIsolateCount--;
|
| + /* make it WS so that it is handled by adjustWSLevels() */
|
| + dirProps[i]=WS;
|
| }
|
| else if(validIsolateCount) {
|
| + flags|=DIRPROP_FLAG(PDI);
|
| lastCcPos=i;
|
| overflowEmbeddingCount=0;
|
| while(stack[stackLast]<ISOLATE) /* pop embedding entries */
|
| @@ -1182,71 +1270,57 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| validIsolateCount--;
|
| bracketProcessPDI(&bracketData);
|
| } else
|
| - dirProps[i]|=IGNORE_CC;
|
| + /* make it WS so that it is handled by adjustWSLevels() */
|
| + dirProps[i]=WS;
|
| embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
|
| - previousLevel=level=embeddingLevel;
|
| - flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
|
| + flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
|
| + previousLevel=embeddingLevel;
|
| + levels[i]=NO_OVERRIDE(embeddingLevel);
|
| break;
|
| case B:
|
| - level=GET_PARALEVEL(pBiDi, i);
|
| + flags|=DIRPROP_FLAG(B);
|
| + levels[i]=GET_PARALEVEL(pBiDi, i);
|
| if((i+1)<length) {
|
| if(text[i]==CR && text[i+1]==LF)
|
| break; /* skip CR when followed by LF */
|
| overflowEmbeddingCount=overflowIsolateCount=0;
|
| validIsolateCount=0;
|
| stackLast=0;
|
| - stack[0]=level; /* initialize base entry to para level, no override, no isolate */
|
| previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
|
| + stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
|
| bracketProcessB(&bracketData, embeddingLevel);
|
| }
|
| - flags|=DIRPROP_FLAG(B);
|
| break;
|
| case BN:
|
| /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
|
| /* they will get their levels set correctly in adjustWSLevels() */
|
| + levels[i]=previousLevel;
|
| flags|=DIRPROP_FLAG(BN);
|
| break;
|
| default:
|
| - /* all other types get the "real" level */
|
| - level=embeddingLevel;
|
| - if(embeddingLevel!=previousLevel) {
|
| + /* all other types are normal characters and get the "real" level */
|
| + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
|
| bracketProcessBoundary(&bracketData, lastCcPos,
|
| previousLevel, embeddingLevel);
|
| - previousLevel=embeddingLevel;
|
| + flags|=DIRPROP_FLAG_MULTI_RUNS;
|
| + if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
|
| + flags|=DIRPROP_FLAG_O(embeddingLevel);
|
| + else
|
| + flags|=DIRPROP_FLAG_E(embeddingLevel);
|
| }
|
| - if(level&UBIDI_LEVEL_OVERRIDE)
|
| - flags|=DIRPROP_FLAG_LR(level);
|
| - else
|
| - flags|=DIRPROP_FLAG(dirProp);
|
| - if(!bracketProcessChar(&bracketData, i, dirProp))
|
| + previousLevel=embeddingLevel;
|
| + levels[i]=embeddingLevel;
|
| + if(!bracketProcessChar(&bracketData, i))
|
| return -1;
|
| + /* the dirProp may have been changed in bracketProcessChar() */
|
| + flags|=DIRPROP_FLAG(dirProps[i]);
|
| break;
|
| }
|
| -
|
| - /*
|
| - * We need to set reasonable levels even on BN codes and
|
| - * explicit codes because we will later look at same-level runs (X10).
|
| - */
|
| - levels[i]=level;
|
| - if(i>0 && levels[i-1]!=level) {
|
| - flags|=DIRPROP_FLAG_MULTI_RUNS;
|
| - if(level&UBIDI_LEVEL_OVERRIDE)
|
| - flags|=DIRPROP_FLAG_O(level);
|
| - else
|
| - flags|=DIRPROP_FLAG_E(level);
|
| - }
|
| - if(DIRPROP_FLAG(dirProp)&MASK_ISO)
|
| - level=embeddingLevel;
|
| }
|
| - if(flags&MASK_EMBEDDING) {
|
| + if(flags&MASK_EMBEDDING)
|
| flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
|
| - }
|
| - if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
|
| + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
|
| flags|=DIRPROP_FLAG(L);
|
| - }
|
| -
|
| - /* subsequently, ignore the explicit codes and BN (X9) */
|
| -
|
| /* again, determine if the text is mixed-directional or single-directional */
|
| pBiDi->flags=flags;
|
| direction=directionFromFlags(pBiDi);
|
| @@ -1304,10 +1378,8 @@ checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
| return UBIDI_LTR;
|
| }
|
| }
|
| - if(flags&MASK_EMBEDDING) {
|
| + if(flags&MASK_EMBEDDING)
|
| flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
|
| - }
|
| -
|
| /* determine if the text is mixed-directional or single-directional */
|
| pBiDi->flags=flags;
|
| return directionFromFlags(pBiDi);
|
| @@ -1407,7 +1479,7 @@ static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
|
| /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
|
| };
|
|
|
| -/* we must undef macro s because the levels table have a different
|
| +/* we must undef macro s because the levels tables have a different
|
| * structure (4 bits for action and 4 bits for next state.
|
| */
|
| #undef s
|
| @@ -1486,7 +1558,7 @@ typedef struct ImpTabPair {
|
| */
|
|
|
| static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
|
| -/* In this table, conditional sequences receive the higher possible level
|
| +/* In this table, conditional sequences receive the lower possible level
|
| until proven otherwise.
|
| */
|
| {
|
| @@ -1495,8 +1567,8 @@ static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
|
| /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
|
| /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
|
| /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
|
| -/* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
|
| -/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
|
| +/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
|
| +/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
|
| };
|
| static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
|
| /* In this table, conditional sequences receive the lower possible level
|
| @@ -1511,23 +1583,23 @@ static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
|
| /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
|
| /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
|
| };
|
| -static const ImpAct impAct0 = {0,1,2,3,4,5,6};
|
| +static const ImpAct impAct0 = {0,1,2,3,4};
|
| static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
|
| &impTabR_DEFAULT},
|
| {&impAct0, &impAct0}};
|
|
|
| static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
|
| -/* In this table, conditional sequences receive the higher possible level
|
| +/* In this table, conditional sequences receive the lower possible level
|
| until proven otherwise.
|
| */
|
| {
|
| /* L , R , EN , AN , ON , S , B , Res */
|
| -/* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
|
| -/* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
|
| -/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
|
| -/* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
|
| -/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
|
| - };
|
| +/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
|
| +/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
|
| +/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
|
| +/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
|
| +/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
|
| +};
|
| static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
|
| &impTabR_DEFAULT},
|
| {&impAct0, &impAct0}};
|
| @@ -1608,7 +1680,7 @@ static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
|
| /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
|
| /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
|
| };
|
| -static const ImpAct impAct1 = {0,1,11,12};
|
| +static const ImpAct impAct1 = {0,1,13,14};
|
| /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
|
| */
|
| static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
|
| @@ -1643,11 +1715,12 @@ static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
|
| /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
|
| /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
|
| };
|
| -static const ImpAct impAct2 = {0,1,7,8,9,10};
|
| +static const ImpAct impAct2 = {0,1,2,5,6,7,8};
|
| +static const ImpAct impAct3 = {0,1,9,10,11,12};
|
| static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
|
| {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
|
| &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
|
| - {&impAct0, &impAct2}};
|
| + {&impAct2, &impAct3}};
|
|
|
| static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
|
| {&impTabL_NUMBERS_SPECIAL,
|
| @@ -1668,7 +1741,7 @@ static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
|
| static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
|
| {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
|
| &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
|
| - {&impAct0, &impAct2}};
|
| + {&impAct2, &impAct3}};
|
|
|
| #undef s
|
|
|
| @@ -1725,6 +1798,23 @@ addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
|
| #undef FIRSTALLOC
|
| }
|
|
|
| +static void
|
| +setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
|
| +{
|
| + DirProp *dirProps=pBiDi->dirProps, dirProp;
|
| + UBiDiLevel *levels=pBiDi->levels;
|
| + int32_t isolateCount=0, k;
|
| + for(k=start; k<limit; k++) {
|
| + dirProp=dirProps[k];
|
| + if(dirProp==PDI)
|
| + isolateCount--;
|
| + if(isolateCount==0)
|
| + levels[k]=level;
|
| + if(dirProp==LRI || dirProp==RLI)
|
| + isolateCount++;
|
| + }
|
| +}
|
| +
|
| /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
|
|
|
| /*
|
| @@ -1768,7 +1858,17 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| start=pLevState->startON;
|
| break;
|
|
|
| - case 3: /* L or S after possible relevant EN/AN */
|
| + case 3: /* EN/AN after R+ON */
|
| + level=pLevState->runLevel+1;
|
| + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
|
| + break;
|
| +
|
| + case 4: /* EN/AN before R for NUMBERS_SPECIAL */
|
| + level=pLevState->runLevel+2;
|
| + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
|
| + break;
|
| +
|
| + case 5: /* L or S after possible relevant EN/AN */
|
| /* check if we had EN after R/AL */
|
| if (pLevState->startL2EN >= 0) {
|
| addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
|
| @@ -1809,7 +1909,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| }
|
| break;
|
|
|
| - case 4: /* R/AL after possible relevant EN/AN */
|
| + case 6: /* R/AL after possible relevant EN/AN */
|
| /* just clean up */
|
| pInsertPoints=&(pBiDi->insertPoints);
|
| if (pInsertPoints->capacity > 0)
|
| @@ -1820,7 +1920,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| pLevState->lastStrongRTL=limit - 1;
|
| break;
|
|
|
| - case 5: /* EN/AN after R/AL + possible cont */
|
| + case 7: /* EN/AN after R/AL + possible cont */
|
| /* check for real AN */
|
| if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
|
| (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
|
| @@ -1847,12 +1947,12 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| }
|
| break;
|
|
|
| - case 6: /* note location of latest R/AL */
|
| + case 8: /* note location of latest R/AL */
|
| pLevState->lastStrongRTL=limit - 1;
|
| pLevState->startON=-1;
|
| break;
|
|
|
| - case 7: /* L after R+ON/EN/AN */
|
| + case 9: /* L after R+ON/EN/AN */
|
| /* include possible adjacent number on the left */
|
| for (k=start0-1; k>=0 && !(levels[k]&1); k--);
|
| if(k>=0) {
|
| @@ -1863,14 +1963,14 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| pLevState->startON=start0;
|
| break;
|
|
|
| - case 8: /* AN after L */
|
| + case 10: /* AN after L */
|
| /* AN numbers between L text on both sides may be trouble. */
|
| /* tentatively bracket with LRMs; will be confirmed if followed by L */
|
| addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
|
| addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
|
| break;
|
|
|
| - case 9: /* R after L+ON/EN/AN */
|
| + case 11: /* R after L+ON/EN/AN */
|
| /* false alert, infirm LRMs around previous AN */
|
| pInsertPoints=&(pBiDi->insertPoints);
|
| pInsertPoints->size=pInsertPoints->confirmed;
|
| @@ -1881,7 +1981,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| }
|
| break;
|
|
|
| - case 10: /* L after L+ON/AN */
|
| + case 12: /* L after L+ON/AN */
|
| level=pLevState->runLevel + addLevel;
|
| for(k=pLevState->startON; k<start0; k++) {
|
| if (levels[k]<level)
|
| @@ -1892,7 +1992,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| pLevState->startON=start0;
|
| break;
|
|
|
| - case 11: /* L after L+ON+EN/AN/ON */
|
| + case 13: /* L after L+ON+EN/AN/ON */
|
| level=pLevState->runLevel;
|
| for(k=start0-1; k>=pLevState->startON; k--) {
|
| if(levels[k]==level+3) {
|
| @@ -1911,7 +2011,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| }
|
| break;
|
|
|
| - case 12: /* R after L+ON+EN/AN/ON */
|
| + case 14: /* R after L+ON+EN/AN/ON */
|
| level=pLevState->runLevel+1;
|
| for(k=start0-1; k>=pLevState->startON; k--) {
|
| if(levels[k]>level) {
|
| @@ -1932,17 +2032,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
|
| levels[k]=level;
|
| }
|
| } else {
|
| - DirProp *dirProps=pBiDi->dirProps, dirProp;
|
| - int32_t isolateCount=0;
|
| - for(k=start; k<limit; k++) {
|
| - dirProp=dirProps[k];
|
| - if(dirProp==PDI)
|
| - isolateCount--;
|
| - if(isolateCount==0)
|
| - levels[k]=level;
|
| - if(dirProp==LRI || dirProp==RLI)
|
| - isolateCount++;
|
| - }
|
| + setLevelsOutsideIsolates(pBiDi, start, limit, level);
|
| }
|
| }
|
| }
|
| @@ -2033,7 +2123,6 @@ resolveImplicitLevels(UBiDi *pBiDi,
|
| pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
|
|
|
| /* initialize for property and levels state tables */
|
| - levState.startON=-1;
|
| levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
|
| levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
|
| levState.runStart=start;
|
| @@ -2050,11 +2139,13 @@ resolveImplicitLevels(UBiDi *pBiDi,
|
| resume the bidi algorithm in the same state as it was
|
| when it was interrupted by an isolate sequence. */
|
| if(dirProps[start]==PDI) {
|
| + levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
|
| start1=pBiDi->isolates[pBiDi->isolateCount].start1;
|
| stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
|
| levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
|
| pBiDi->isolateCount--;
|
| } else {
|
| + levState.startON=-1;
|
| start1=start;
|
| if(dirProps[start]==NSM)
|
| stateImp = 1 + sor;
|
| @@ -2063,19 +2154,22 @@ resolveImplicitLevels(UBiDi *pBiDi,
|
| levState.state=0;
|
| processPropertySeq(pBiDi, &levState, sor, start, start);
|
| }
|
| - start2=start;
|
| + start2=start; /* to make Java compiler happy */
|
|
|
| for(i=start; i<=limit; i++) {
|
| if(i>=limit) {
|
| - if(limit>start) {
|
| - dirProp=pBiDi->dirProps[limit-1];
|
| - if(dirProp==LRI || dirProp==RLI)
|
| - break; /* no forced closing for sequence ending with LRI/RLI */
|
| - }
|
| + int32_t k;
|
| + for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
|
| + dirProp=dirProps[k];
|
| + if(dirProp==LRI || dirProp==RLI)
|
| + break; /* no forced closing for sequence ending with LRI/RLI */
|
| gprop=eor;
|
| } else {
|
| DirProp prop, prop1;
|
| - prop=PURE_DIRPROP(dirProps[i]);
|
| + prop=dirProps[i];
|
| + if(prop==B) {
|
| + pBiDi->isolateCount=-1; /* current isolates stack entry == none */
|
| + }
|
| if(inverseRTL) {
|
| if(prop==AL) {
|
| /* AL before EN does not make it AN */
|
| @@ -2145,12 +2239,15 @@ resolveImplicitLevels(UBiDi *pBiDi,
|
| }
|
| }
|
|
|
| - dirProp=dirProps[limit-1];
|
| + /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
|
| + for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
|
| + dirProp=dirProps[i];
|
| if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
|
| pBiDi->isolateCount++;
|
| pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
|
| pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
|
| pBiDi->isolates[pBiDi->isolateCount].start1=start1;
|
| + pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
|
| }
|
| else
|
| processPropertySeq(pBiDi, &levState, eor, limit, limit);
|
| @@ -2177,7 +2274,7 @@ adjustWSLevels(UBiDi *pBiDi) {
|
| i=pBiDi->trailingWSStart;
|
| while(i>0) {
|
| /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
|
| - while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) {
|
| + while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
|
| if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
|
| levels[i]=0;
|
| } else {
|
| @@ -2188,7 +2285,7 @@ adjustWSLevels(UBiDi *pBiDi) {
|
| /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
|
| /* here, i+1 is guaranteed to be <length */
|
| while(i>0) {
|
| - flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i]));
|
| + flag=DIRPROP_FLAG(dirProps[--i]);
|
| if(flag&MASK_BN_EXPLICIT) {
|
| levels[i]=levels[i+1];
|
| } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
|
| @@ -2433,6 +2530,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
| UErrorCode *pErrorCode) {
|
| UBiDiDirection direction;
|
| + DirProp *dirProps;
|
|
|
| /* check the argument values */
|
| RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
|
| @@ -2511,6 +2609,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
| return;
|
| }
|
| + dirProps=pBiDi->dirProps;
|
| /* the processed length may have changed if UBIDI_OPTION_STREAMING */
|
| length= pBiDi->length;
|
| pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
|
| @@ -2538,10 +2637,10 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| }
|
|
|
| /* allocate isolate memory */
|
| - if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE)
|
| + if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
|
| pBiDi->isolates=pBiDi->simpleIsolates;
|
| else
|
| - if(pBiDi->isolateCount<=pBiDi->isolatesSize)
|
| + if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
|
| pBiDi->isolates=pBiDi->isolatesMemory;
|
| else {
|
| if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
|
| @@ -2560,16 +2659,10 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| pBiDi->direction=direction;
|
| switch(direction) {
|
| case UBIDI_LTR:
|
| - /* make sure paraLevel is even */
|
| - pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
|
| -
|
| /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
|
| pBiDi->trailingWSStart=0;
|
| break;
|
| case UBIDI_RTL:
|
| - /* make sure paraLevel is odd */
|
| - pBiDi->paraLevel|=1;
|
| -
|
| /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
|
| pBiDi->trailingWSStart=0;
|
| break;
|
| @@ -2647,7 +2740,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| /* the values for this run's start are the same as for the previous run's end */
|
| start=limit;
|
| level=nextLevel;
|
| - if((start>0) && (pBiDi->dirProps[start-1]==B)) {
|
| + if((start>0) && (dirProps[start-1]==B)) {
|
| /* except if this is a new paragraph, then set sor = para level */
|
| sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
|
| } else {
|
| @@ -2655,7 +2748,9 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| }
|
|
|
| /* search for the limit of this run */
|
| - while(++limit<length && levels[limit]==level) {}
|
| + while((++limit<length) &&
|
| + ((levels[limit]==level) ||
|
| + (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
|
|
|
| /* get the correct level of the next run */
|
| if(limit<length) {
|
| @@ -2665,7 +2760,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| }
|
|
|
| /* determine eor from max(level, nextLevel); sor is last run's eor */
|
| - if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
|
| + if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
|
| eor=GET_LR_FROM_LEVEL(nextLevel);
|
| } else {
|
| eor=GET_LR_FROM_LEVEL(level);
|
| @@ -2710,10 +2805,10 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
| continue; /* LTR paragraph */
|
| start= i==0 ? 0 : pBiDi->paras[i-1].limit;
|
| for(j=last; j>=start; j--) {
|
| - dirProp=pBiDi->dirProps[j];
|
| + dirProp=dirProps[j];
|
| if(dirProp==L) {
|
| if(j<last) {
|
| - while(pBiDi->dirProps[last]==B) {
|
| + while(dirProps[last]==B) {
|
| last--;
|
| }
|
| }
|
|
|