| Index: third_party/sqlite/src/ext/fts3/fts3_expr.c | 
| diff --git a/third_party/sqlite/src/ext/fts3/fts3_expr.c b/third_party/sqlite/src/ext/fts3/fts3_expr.c | 
| index 43f6d84a8409773ac32e76032a164eb91900ff44..2ba786ce8092147b9b371b26929dda6f167ef3be 100644 | 
| --- a/third_party/sqlite/src/ext/fts3/fts3_expr.c | 
| +++ b/third_party/sqlite/src/ext/fts3/fts3_expr.c | 
| @@ -15,6 +15,7 @@ | 
| ** syntax is relatively simple, the whole tokenizer/parser system is | 
| ** hand-coded. | 
| */ | 
| +#include "fts3Int.h" | 
| #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 
|  | 
| /* | 
| @@ -77,16 +78,26 @@ int sqlite3_fts3_enable_parentheses = 0; | 
| */ | 
| #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 | 
|  | 
| -#include "fts3Int.h" | 
| #include <string.h> | 
| #include <assert.h> | 
|  | 
| +/* | 
| +** isNot: | 
| +**   This variable is used by function getNextNode(). When getNextNode() is | 
| +**   called, it sets ParseContext.isNot to true if the 'next node' is a | 
| +**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the | 
| +**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to | 
| +**   zero. | 
| +*/ | 
| typedef struct ParseContext ParseContext; | 
| struct ParseContext { | 
| sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */ | 
| +  int iLangid;                        /* Language id used with tokenizer */ | 
| const char **azCol;                 /* Array of column names for fts3 table */ | 
| +  int bFts4;                          /* True to allow FTS4-only syntax */ | 
| int nCol;                           /* Number of entries in azCol[] */ | 
| int iDefaultCol;                    /* Default column to query */ | 
| +  int isNot;                          /* True if getNextNode() sees a unary - */ | 
| sqlite3_context *pCtx;              /* Write error message here */ | 
| int nNest;                          /* Number of nested brackets */ | 
| }; | 
| @@ -95,7 +106,7 @@ struct ParseContext { | 
| ** This function is equivalent to the standard isspace() function. | 
| ** | 
| ** The standard isspace() can be awkward to use safely, because although it | 
| -** is defined to accept an argument of type int, its behaviour when passed | 
| +** is defined to accept an argument of type int, its behavior when passed | 
| ** an integer that falls outside of the range of the unsigned char type | 
| ** is undefined (and sometimes, "undefined" means segfault). This wrapper | 
| ** is defined to accept an argument of type char, and always returns 0 for | 
| @@ -117,6 +128,38 @@ static void *fts3MallocZero(int nByte){ | 
| return pRet; | 
| } | 
|  | 
| +int sqlite3Fts3OpenTokenizer( | 
| +  sqlite3_tokenizer *pTokenizer, | 
| +  int iLangid, | 
| +  const char *z, | 
| +  int n, | 
| +  sqlite3_tokenizer_cursor **ppCsr | 
| +){ | 
| +  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | 
| +  sqlite3_tokenizer_cursor *pCsr = 0; | 
| +  int rc; | 
| + | 
| +  rc = pModule->xOpen(pTokenizer, z, n, &pCsr); | 
| +  assert( rc==SQLITE_OK || pCsr==0 ); | 
| +  if( rc==SQLITE_OK ){ | 
| +    pCsr->pTokenizer = pTokenizer; | 
| +    if( pModule->iVersion>=1 ){ | 
| +      rc = pModule->xLanguageid(pCsr, iLangid); | 
| +      if( rc!=SQLITE_OK ){ | 
| +        pModule->xClose(pCsr); | 
| +        pCsr = 0; | 
| +      } | 
| +    } | 
| +  } | 
| +  *ppCsr = pCsr; | 
| +  return rc; | 
| +} | 
| + | 
| +/* | 
| +** Function getNextNode(), which is called by fts3ExprParse(), may itself | 
| +** call fts3ExprParse(). So this forward declaration is required. | 
| +*/ | 
| +static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); | 
|  | 
| /* | 
| ** Extract the next token from buffer z (length n) using the tokenizer | 
| @@ -142,17 +185,22 @@ static int getNextToken( | 
| int rc; | 
| sqlite3_tokenizer_cursor *pCursor; | 
| Fts3Expr *pRet = 0; | 
| -  int nConsumed = 0; | 
| +  int i = 0; | 
|  | 
| -  rc = pModule->xOpen(pTokenizer, z, n, &pCursor); | 
| +  /* Set variable i to the maximum number of bytes of input to tokenize. */ | 
| +  for(i=0; i<n; i++){ | 
| +    if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; | 
| +    if( z[i]=='"' ) break; | 
| +  } | 
| + | 
| +  *pnConsumed = i; | 
| +  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); | 
| if( rc==SQLITE_OK ){ | 
| const char *zToken; | 
| -    int nToken, iStart, iEnd, iPosition; | 
| +    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; | 
| int nByte;                               /* total space to allocate */ | 
|  | 
| -    pCursor->pTokenizer = pTokenizer; | 
| rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); | 
| - | 
| if( rc==SQLITE_OK ){ | 
| nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; | 
| pRet = (Fts3Expr *)fts3MallocZero(nByte); | 
| @@ -171,17 +219,30 @@ static int getNextToken( | 
| pRet->pPhrase->aToken[0].isPrefix = 1; | 
| iEnd++; | 
| } | 
| -        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ | 
| -          pRet->pPhrase->isNot = 1; | 
| + | 
| +        while( 1 ){ | 
| +          if( !sqlite3_fts3_enable_parentheses | 
| +           && iStart>0 && z[iStart-1]=='-' | 
| +          ){ | 
| +            pParse->isNot = 1; | 
| +            iStart--; | 
| +          }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ | 
| +            pRet->pPhrase->aToken[0].bFirst = 1; | 
| +            iStart--; | 
| +          }else{ | 
| +            break; | 
| +          } | 
| } | 
| + | 
| } | 
| -      nConsumed = iEnd; | 
| +      *pnConsumed = iEnd; | 
| +    }else if( i && rc==SQLITE_DONE ){ | 
| +      rc = SQLITE_OK; | 
| } | 
|  | 
| pModule->xClose(pCursor); | 
| } | 
|  | 
| -  *pnConsumed = nConsumed; | 
| *ppExpr = pRet; | 
| return rc; | 
| } | 
| @@ -224,36 +285,56 @@ static int getNextString( | 
| char *zTemp = 0; | 
| int nTemp = 0; | 
|  | 
| -  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); | 
| +  const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 
| +  int nToken = 0; | 
| + | 
| +  /* The final Fts3Expr data structure, including the Fts3Phrase, | 
| +  ** Fts3PhraseToken structures token buffers are all stored as a single | 
| +  ** allocation so that the expression can be freed with a single call to | 
| +  ** sqlite3_free(). Setting this up requires a two pass approach. | 
| +  ** | 
| +  ** The first pass, in the block below, uses a tokenizer cursor to iterate | 
| +  ** through the tokens in the expression. This pass uses fts3ReallocOrFree() | 
| +  ** to assemble data in two dynamic buffers: | 
| +  ** | 
| +  **   Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase | 
| +  **             structure, followed by the array of Fts3PhraseToken | 
| +  **             structures. This pass only populates the Fts3PhraseToken array. | 
| +  ** | 
| +  **   Buffer zTemp: Contains copies of all tokens. | 
| +  ** | 
| +  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, | 
| +  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase | 
| +  ** structures. | 
| +  */ | 
| +  rc = sqlite3Fts3OpenTokenizer( | 
| +      pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); | 
| if( rc==SQLITE_OK ){ | 
| int ii; | 
| -    pCursor->pTokenizer = pTokenizer; | 
| for(ii=0; rc==SQLITE_OK; ii++){ | 
| -      const char *zToken; | 
| -      int nToken, iBegin, iEnd, iPos; | 
| -      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); | 
| +      const char *zByte; | 
| +      int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; | 
| +      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); | 
| if( rc==SQLITE_OK ){ | 
| -        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 
| -        p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); | 
| -        zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); | 
| -        if( !p || !zTemp ){ | 
| -          goto no_mem; | 
| -        } | 
| -        if( ii==0 ){ | 
| -          memset(p, 0, nByte); | 
| -          p->pPhrase = (Fts3Phrase *)&p[1]; | 
| -        } | 
| -        p->pPhrase = (Fts3Phrase *)&p[1]; | 
| -        memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); | 
| -        p->pPhrase->nToken = ii+1; | 
| -        p->pPhrase->aToken[ii].n = nToken; | 
| -        memcpy(&zTemp[nTemp], zToken, nToken); | 
| -        nTemp += nToken; | 
| -        if( iEnd<nInput && zInput[iEnd]=='*' ){ | 
| -          p->pPhrase->aToken[ii].isPrefix = 1; | 
| -        }else{ | 
| -          p->pPhrase->aToken[ii].isPrefix = 0; | 
| -        } | 
| +        Fts3PhraseToken *pToken; | 
| + | 
| +        p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); | 
| +        if( !p ) goto no_mem; | 
| + | 
| +        zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); | 
| +        if( !zTemp ) goto no_mem; | 
| + | 
| +        assert( nToken==ii ); | 
| +        pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; | 
| +        memset(pToken, 0, sizeof(Fts3PhraseToken)); | 
| + | 
| +        memcpy(&zTemp[nTemp], zByte, nByte); | 
| +        nTemp += nByte; | 
| + | 
| +        pToken->n = nByte; | 
| +        pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); | 
| +        pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); | 
| +        nToken = ii+1; | 
| } | 
| } | 
|  | 
| @@ -263,28 +344,28 @@ static int getNextString( | 
|  | 
| if( rc==SQLITE_DONE ){ | 
| int jj; | 
| -    char *zNew = NULL; | 
| -    int nNew = 0; | 
| -    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 
| -    nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); | 
| -    p = fts3ReallocOrFree(p, nByte + nTemp); | 
| -    if( !p ){ | 
| -      goto no_mem; | 
| -    } | 
| +    char *zBuf = 0; | 
| + | 
| +    p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); | 
| +    if( !p ) goto no_mem; | 
| +    memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); | 
| +    p->eType = FTSQUERY_PHRASE; | 
| +    p->pPhrase = (Fts3Phrase *)&p[1]; | 
| +    p->pPhrase->iColumn = pParse->iDefaultCol; | 
| +    p->pPhrase->nToken = nToken; | 
| + | 
| +    zBuf = (char *)&p->pPhrase->aToken[nToken]; | 
| if( zTemp ){ | 
| -      zNew = &(((char *)p)[nByte]); | 
| -      memcpy(zNew, zTemp, nTemp); | 
| +      memcpy(zBuf, zTemp, nTemp); | 
| +      sqlite3_free(zTemp); | 
| }else{ | 
| -      memset(p, 0, nByte+nTemp); | 
| +      assert( nTemp==0 ); | 
| } | 
| -    p->pPhrase = (Fts3Phrase *)&p[1]; | 
| + | 
| for(jj=0; jj<p->pPhrase->nToken; jj++){ | 
| -      p->pPhrase->aToken[jj].z = &zNew[nNew]; | 
| -      nNew += p->pPhrase->aToken[jj].n; | 
| +      p->pPhrase->aToken[jj].z = zBuf; | 
| +      zBuf += p->pPhrase->aToken[jj].n; | 
| } | 
| -    sqlite3_free(zTemp); | 
| -    p->eType = FTSQUERY_PHRASE; | 
| -    p->pPhrase->iColumn = pParse->iDefaultCol; | 
| rc = SQLITE_OK; | 
| } | 
|  | 
| @@ -302,12 +383,6 @@ no_mem: | 
| } | 
|  | 
| /* | 
| -** Function getNextNode(), which is called by fts3ExprParse(), may itself | 
| -** call fts3ExprParse(). So this forward declaration is required. | 
| -*/ | 
| -static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); | 
| - | 
| -/* | 
| ** The output variable *ppExpr is populated with an allocated Fts3Expr | 
| ** structure, or set to 0 if the end of the input buffer is reached. | 
| ** | 
| @@ -341,6 +416,8 @@ static int getNextNode( | 
| const char *zInput = z; | 
| int nInput = n; | 
|  | 
| +  pParse->isNot = 0; | 
| + | 
| /* Skip over any whitespace before checking for a keyword, an open or | 
| ** close bracket, or a quoted string. | 
| */ | 
| @@ -401,27 +478,6 @@ static int getNextNode( | 
| } | 
| } | 
|  | 
| -  /* Check for an open bracket. */ | 
| -  if( sqlite3_fts3_enable_parentheses ){ | 
| -    if( *zInput=='(' ){ | 
| -      int nConsumed; | 
| -      pParse->nNest++; | 
| -      rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); | 
| -      if( rc==SQLITE_OK && !*ppExpr ){ | 
| -        rc = SQLITE_DONE; | 
| -      } | 
| -      *pnConsumed = (int)((zInput - z) + 1 + nConsumed); | 
| -      return rc; | 
| -    } | 
| - | 
| -    /* Check for a close bracket. */ | 
| -    if( *zInput==')' ){ | 
| -      pParse->nNest--; | 
| -      *pnConsumed = (int)((zInput - z) + 1); | 
| -      return SQLITE_DONE; | 
| -    } | 
| -  } | 
| - | 
| /* See if we are dealing with a quoted phrase. If this is the case, then | 
| ** search for the closing quote and pass the whole string to getNextString() | 
| ** for processing. This is easy to do, as fts3 has no syntax for escaping | 
| @@ -436,6 +492,21 @@ static int getNextNode( | 
| return getNextString(pParse, &zInput[1], ii-1, ppExpr); | 
| } | 
|  | 
| +  if( sqlite3_fts3_enable_parentheses ){ | 
| +    if( *zInput=='(' ){ | 
| +      int nConsumed = 0; | 
| +      pParse->nNest++; | 
| +      rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); | 
| +      if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } | 
| +      *pnConsumed = (int)(zInput - z) + 1 + nConsumed; | 
| +      return rc; | 
| +    }else if( *zInput==')' ){ | 
| +      pParse->nNest--; | 
| +      *pnConsumed = (int)((zInput - z) + 1); | 
| +      *ppExpr = 0; | 
| +      return SQLITE_DONE; | 
| +    } | 
| +  } | 
|  | 
| /* If control flows to this point, this must be a regular token, or | 
| ** the end of the input. Read a regular token using the sqlite3_tokenizer | 
| @@ -554,95 +625,100 @@ static int fts3ExprParse( | 
| while( rc==SQLITE_OK ){ | 
| Fts3Expr *p = 0; | 
| int nByte = 0; | 
| + | 
| rc = getNextNode(pParse, zIn, nIn, &p, &nByte); | 
| +    assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); | 
| if( rc==SQLITE_OK ){ | 
| -      int isPhrase; | 
| - | 
| -      if( !sqlite3_fts3_enable_parentheses | 
| -       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot | 
| -      ){ | 
| -        /* Create an implicit NOT operator. */ | 
| -        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); | 
| -        if( !pNot ){ | 
| -          sqlite3Fts3ExprFree(p); | 
| -          rc = SQLITE_NOMEM; | 
| -          goto exprparse_out; | 
| -        } | 
| -        pNot->eType = FTSQUERY_NOT; | 
| -        pNot->pRight = p; | 
| -        if( pNotBranch ){ | 
| -          pNot->pLeft = pNotBranch; | 
| -        } | 
| -        pNotBranch = pNot; | 
| -        p = pPrev; | 
| -      }else{ | 
| -        int eType = p->eType; | 
| -        assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); | 
| -        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); | 
| - | 
| -        /* The isRequirePhrase variable is set to true if a phrase or | 
| -        ** an expression contained in parenthesis is required. If a | 
| -        ** binary operator (AND, OR, NOT or NEAR) is encounted when | 
| -        ** isRequirePhrase is set, this is a syntax error. | 
| -        */ | 
| -        if( !isPhrase && isRequirePhrase ){ | 
| -          sqlite3Fts3ExprFree(p); | 
| -          rc = SQLITE_ERROR; | 
| -          goto exprparse_out; | 
| -        } | 
| - | 
| -        if( isPhrase && !isRequirePhrase ){ | 
| -          /* Insert an implicit AND operator. */ | 
| -          Fts3Expr *pAnd; | 
| -          assert( pRet && pPrev ); | 
| -          pAnd = fts3MallocZero(sizeof(Fts3Expr)); | 
| -          if( !pAnd ){ | 
| +      if( p ){ | 
| +        int isPhrase; | 
| + | 
| +        if( !sqlite3_fts3_enable_parentheses | 
| +            && p->eType==FTSQUERY_PHRASE && pParse->isNot | 
| +        ){ | 
| +          /* Create an implicit NOT operator. */ | 
| +          Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); | 
| +          if( !pNot ){ | 
| sqlite3Fts3ExprFree(p); | 
| rc = SQLITE_NOMEM; | 
| goto exprparse_out; | 
| } | 
| -          pAnd->eType = FTSQUERY_AND; | 
| -          insertBinaryOperator(&pRet, pPrev, pAnd); | 
| -          pPrev = pAnd; | 
| -        } | 
| +          pNot->eType = FTSQUERY_NOT; | 
| +          pNot->pRight = p; | 
| +          p->pParent = pNot; | 
| +          if( pNotBranch ){ | 
| +            pNot->pLeft = pNotBranch; | 
| +            pNotBranch->pParent = pNot; | 
| +          } | 
| +          pNotBranch = pNot; | 
| +          p = pPrev; | 
| +        }else{ | 
| +          int eType = p->eType; | 
| +          isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); | 
| + | 
| +          /* The isRequirePhrase variable is set to true if a phrase or | 
| +          ** an expression contained in parenthesis is required. If a | 
| +          ** binary operator (AND, OR, NOT or NEAR) is encounted when | 
| +          ** isRequirePhrase is set, this is a syntax error. | 
| +          */ | 
| +          if( !isPhrase && isRequirePhrase ){ | 
| +            sqlite3Fts3ExprFree(p); | 
| +            rc = SQLITE_ERROR; | 
| +            goto exprparse_out; | 
| +          } | 
| + | 
| +          if( isPhrase && !isRequirePhrase ){ | 
| +            /* Insert an implicit AND operator. */ | 
| +            Fts3Expr *pAnd; | 
| +            assert( pRet && pPrev ); | 
| +            pAnd = fts3MallocZero(sizeof(Fts3Expr)); | 
| +            if( !pAnd ){ | 
| +              sqlite3Fts3ExprFree(p); | 
| +              rc = SQLITE_NOMEM; | 
| +              goto exprparse_out; | 
| +            } | 
| +            pAnd->eType = FTSQUERY_AND; | 
| +            insertBinaryOperator(&pRet, pPrev, pAnd); | 
| +            pPrev = pAnd; | 
| +          } | 
|  | 
| -        /* This test catches attempts to make either operand of a NEAR | 
| -        ** operator something other than a phrase. For example, either of | 
| -        ** the following: | 
| -        ** | 
| -        **    (bracketed expression) NEAR phrase | 
| -        **    phrase NEAR (bracketed expression) | 
| -        ** | 
| -        ** Return an error in either case. | 
| -        */ | 
| -        if( pPrev && ( | 
| +          /* This test catches attempts to make either operand of a NEAR | 
| +           ** operator something other than a phrase. For example, either of | 
| +           ** the following: | 
| +           ** | 
| +           **    (bracketed expression) NEAR phrase | 
| +           **    phrase NEAR (bracketed expression) | 
| +           ** | 
| +           ** Return an error in either case. | 
| +           */ | 
| +          if( pPrev && ( | 
| (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) | 
| || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) | 
| -        )){ | 
| -          sqlite3Fts3ExprFree(p); | 
| -          rc = SQLITE_ERROR; | 
| -          goto exprparse_out; | 
| -        } | 
| - | 
| -        if( isPhrase ){ | 
| -          if( pRet ){ | 
| -            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); | 
| -            pPrev->pRight = p; | 
| -            p->pParent = pPrev; | 
| +          )){ | 
| +            sqlite3Fts3ExprFree(p); | 
| +            rc = SQLITE_ERROR; | 
| +            goto exprparse_out; | 
| +          } | 
| + | 
| +          if( isPhrase ){ | 
| +            if( pRet ){ | 
| +              assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); | 
| +              pPrev->pRight = p; | 
| +              p->pParent = pPrev; | 
| +            }else{ | 
| +              pRet = p; | 
| +            } | 
| }else{ | 
| -            pRet = p; | 
| +            insertBinaryOperator(&pRet, pPrev, p); | 
| } | 
| -        }else{ | 
| -          insertBinaryOperator(&pRet, pPrev, p); | 
| +          isRequirePhrase = !isPhrase; | 
| } | 
| -        isRequirePhrase = !isPhrase; | 
| +        pPrev = p; | 
| } | 
| assert( nByte>0 ); | 
| } | 
| assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); | 
| nIn -= nByte; | 
| zIn += nByte; | 
| -    pPrev = p; | 
| } | 
|  | 
| if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ | 
| @@ -660,6 +736,7 @@ static int fts3ExprParse( | 
| pIter = pIter->pLeft; | 
| } | 
| pIter->pLeft = pRet; | 
| +        pRet->pParent = pIter; | 
| pRet = pNotBranch; | 
| } | 
| } | 
| @@ -677,6 +754,223 @@ exprparse_out: | 
| } | 
|  | 
| /* | 
| +** Return SQLITE_ERROR if the maximum depth of the expression tree passed | 
| +** as the only argument is more than nMaxDepth. | 
| +*/ | 
| +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ | 
| +  int rc = SQLITE_OK; | 
| +  if( p ){ | 
| +    if( nMaxDepth<0 ){ | 
| +      rc = SQLITE_TOOBIG; | 
| +    }else{ | 
| +      rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); | 
| +      if( rc==SQLITE_OK ){ | 
| +        rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); | 
| +      } | 
| +    } | 
| +  } | 
| +  return rc; | 
| +} | 
| + | 
| +/* | 
| +** This function attempts to transform the expression tree at (*pp) to | 
| +** an equivalent but more balanced form. The tree is modified in place. | 
| +** If successful, SQLITE_OK is returned and (*pp) set to point to the | 
| +** new root expression node. | 
| +** | 
| +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. | 
| +** | 
| +** Otherwise, if an error occurs, an SQLite error code is returned and | 
| +** expression (*pp) freed. | 
| +*/ | 
| +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ | 
| +  int rc = SQLITE_OK;             /* Return code */ | 
| +  Fts3Expr *pRoot = *pp;          /* Initial root node */ | 
| +  Fts3Expr *pFree = 0;            /* List of free nodes. Linked by pParent. */ | 
| +  int eType = pRoot->eType;       /* Type of node in this tree */ | 
| + | 
| +  if( nMaxDepth==0 ){ | 
| +    rc = SQLITE_ERROR; | 
| +  } | 
| + | 
| +  if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ | 
| +    Fts3Expr **apLeaf; | 
| +    apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); | 
| +    if( 0==apLeaf ){ | 
| +      rc = SQLITE_NOMEM; | 
| +    }else{ | 
| +      memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); | 
| +    } | 
| + | 
| +    if( rc==SQLITE_OK ){ | 
| +      int i; | 
| +      Fts3Expr *p; | 
| + | 
| +      /* Set $p to point to the left-most leaf in the tree of eType nodes. */ | 
| +      for(p=pRoot; p->eType==eType; p=p->pLeft){ | 
| +        assert( p->pParent==0 || p->pParent->pLeft==p ); | 
| +        assert( p->pLeft && p->pRight ); | 
| +      } | 
| + | 
| +      /* This loop runs once for each leaf in the tree of eType nodes. */ | 
| +      while( 1 ){ | 
| +        int iLvl; | 
| +        Fts3Expr *pParent = p->pParent;     /* Current parent of p */ | 
| + | 
| +        assert( pParent==0 || pParent->pLeft==p ); | 
| +        p->pParent = 0; | 
| +        if( pParent ){ | 
| +          pParent->pLeft = 0; | 
| +        }else{ | 
| +          pRoot = 0; | 
| +        } | 
| +        rc = fts3ExprBalance(&p, nMaxDepth-1); | 
| +        if( rc!=SQLITE_OK ) break; | 
| + | 
| +        for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ | 
| +          if( apLeaf[iLvl]==0 ){ | 
| +            apLeaf[iLvl] = p; | 
| +            p = 0; | 
| +          }else{ | 
| +            assert( pFree ); | 
| +            pFree->pLeft = apLeaf[iLvl]; | 
| +            pFree->pRight = p; | 
| +            pFree->pLeft->pParent = pFree; | 
| +            pFree->pRight->pParent = pFree; | 
| + | 
| +            p = pFree; | 
| +            pFree = pFree->pParent; | 
| +            p->pParent = 0; | 
| +            apLeaf[iLvl] = 0; | 
| +          } | 
| +        } | 
| +        if( p ){ | 
| +          sqlite3Fts3ExprFree(p); | 
| +          rc = SQLITE_TOOBIG; | 
| +          break; | 
| +        } | 
| + | 
| +        /* If that was the last leaf node, break out of the loop */ | 
| +        if( pParent==0 ) break; | 
| + | 
| +        /* Set $p to point to the next leaf in the tree of eType nodes */ | 
| +        for(p=pParent->pRight; p->eType==eType; p=p->pLeft); | 
| + | 
| +        /* Remove pParent from the original tree. */ | 
| +        assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); | 
| +        pParent->pRight->pParent = pParent->pParent; | 
| +        if( pParent->pParent ){ | 
| +          pParent->pParent->pLeft = pParent->pRight; | 
| +        }else{ | 
| +          assert( pParent==pRoot ); | 
| +          pRoot = pParent->pRight; | 
| +        } | 
| + | 
| +        /* Link pParent into the free node list. It will be used as an | 
| +        ** internal node of the new tree.  */ | 
| +        pParent->pParent = pFree; | 
| +        pFree = pParent; | 
| +      } | 
| + | 
| +      if( rc==SQLITE_OK ){ | 
| +        p = 0; | 
| +        for(i=0; i<nMaxDepth; i++){ | 
| +          if( apLeaf[i] ){ | 
| +            if( p==0 ){ | 
| +              p = apLeaf[i]; | 
| +              p->pParent = 0; | 
| +            }else{ | 
| +              assert( pFree!=0 ); | 
| +              pFree->pRight = p; | 
| +              pFree->pLeft = apLeaf[i]; | 
| +              pFree->pLeft->pParent = pFree; | 
| +              pFree->pRight->pParent = pFree; | 
| + | 
| +              p = pFree; | 
| +              pFree = pFree->pParent; | 
| +              p->pParent = 0; | 
| +            } | 
| +          } | 
| +        } | 
| +        pRoot = p; | 
| +      }else{ | 
| +        /* An error occurred. Delete the contents of the apLeaf[] array | 
| +        ** and pFree list. Everything else is cleaned up by the call to | 
| +        ** sqlite3Fts3ExprFree(pRoot) below.  */ | 
| +        Fts3Expr *pDel; | 
| +        for(i=0; i<nMaxDepth; i++){ | 
| +          sqlite3Fts3ExprFree(apLeaf[i]); | 
| +        } | 
| +        while( (pDel=pFree)!=0 ){ | 
| +          pFree = pDel->pParent; | 
| +          sqlite3_free(pDel); | 
| +        } | 
| +      } | 
| + | 
| +      assert( pFree==0 ); | 
| +      sqlite3_free( apLeaf ); | 
| +    } | 
| +  } | 
| + | 
| +  if( rc!=SQLITE_OK ){ | 
| +    sqlite3Fts3ExprFree(pRoot); | 
| +    pRoot = 0; | 
| +  } | 
| +  *pp = pRoot; | 
| +  return rc; | 
| +} | 
| + | 
| +/* | 
| +** This function is similar to sqlite3Fts3ExprParse(), with the following | 
| +** differences: | 
| +** | 
| +**   1. It does not do expression rebalancing. | 
| +**   2. It does not check that the expression does not exceed the | 
| +**      maximum allowable depth. | 
| +**   3. Even if it fails, *ppExpr may still be set to point to an | 
| +**      expression tree. It should be deleted using sqlite3Fts3ExprFree() | 
| +**      in this case. | 
| +*/ | 
| +static int fts3ExprParseUnbalanced( | 
| +  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */ | 
| +  int iLangid,                        /* Language id for tokenizer */ | 
| +  char **azCol,                       /* Array of column names for fts3 table */ | 
| +  int bFts4,                          /* True to allow FTS4-only syntax */ | 
| +  int nCol,                           /* Number of entries in azCol[] */ | 
| +  int iDefaultCol,                    /* Default column to query */ | 
| +  const char *z, int n,               /* Text of MATCH query */ | 
| +  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */ | 
| +){ | 
| +  int nParsed; | 
| +  int rc; | 
| +  ParseContext sParse; | 
| + | 
| +  memset(&sParse, 0, sizeof(ParseContext)); | 
| +  sParse.pTokenizer = pTokenizer; | 
| +  sParse.iLangid = iLangid; | 
| +  sParse.azCol = (const char **)azCol; | 
| +  sParse.nCol = nCol; | 
| +  sParse.iDefaultCol = iDefaultCol; | 
| +  sParse.bFts4 = bFts4; | 
| +  if( z==0 ){ | 
| +    *ppExpr = 0; | 
| +    return SQLITE_OK; | 
| +  } | 
| +  if( n<0 ){ | 
| +    n = (int)strlen(z); | 
| +  } | 
| +  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); | 
| +  assert( rc==SQLITE_OK || *ppExpr==0 ); | 
| + | 
| +  /* Check for mismatched parenthesis */ | 
| +  if( rc==SQLITE_OK && sParse.nNest ){ | 
| +    rc = SQLITE_ERROR; | 
| +  } | 
| + | 
| +  return rc; | 
| +} | 
| + | 
| +/* | 
| ** Parameters z and n contain a pointer to and length of a buffer containing | 
| ** an fts3 query expression, respectively. This function attempts to parse the | 
| ** query expression and create a tree of Fts3Expr structures representing the | 
| @@ -702,48 +996,80 @@ exprparse_out: | 
| */ | 
| int sqlite3Fts3ExprParse( | 
| sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */ | 
| +  int iLangid,                        /* Language id for tokenizer */ | 
| char **azCol,                       /* Array of column names for fts3 table */ | 
| +  int bFts4,                          /* True to allow FTS4-only syntax */ | 
| int nCol,                           /* Number of entries in azCol[] */ | 
| int iDefaultCol,                    /* Default column to query */ | 
| const char *z, int n,               /* Text of MATCH query */ | 
| -  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */ | 
| +  Fts3Expr **ppExpr,                  /* OUT: Parsed query structure */ | 
| +  char **pzErr                        /* OUT: Error message (sqlite3_malloc) */ | 
| ){ | 
| -  int nParsed; | 
| -  int rc; | 
| -  ParseContext sParse; | 
| -  sParse.pTokenizer = pTokenizer; | 
| -  sParse.azCol = (const char **)azCol; | 
| -  sParse.nCol = nCol; | 
| -  sParse.iDefaultCol = iDefaultCol; | 
| -  sParse.nNest = 0; | 
| -  if( z==0 ){ | 
| -    *ppExpr = 0; | 
| -    return SQLITE_OK; | 
| -  } | 
| -  if( n<0 ){ | 
| -    n = (int)strlen(z); | 
| +  int rc = fts3ExprParseUnbalanced( | 
| +      pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr | 
| +  ); | 
| + | 
| +  /* Rebalance the expression. And check that its depth does not exceed | 
| +  ** SQLITE_FTS3_MAX_EXPR_DEPTH.  */ | 
| +  if( rc==SQLITE_OK && *ppExpr ){ | 
| +    rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); | 
| +    if( rc==SQLITE_OK ){ | 
| +      rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); | 
| +    } | 
| } | 
| -  rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); | 
|  | 
| -  /* Check for mismatched parenthesis */ | 
| -  if( rc==SQLITE_OK && sParse.nNest ){ | 
| -    rc = SQLITE_ERROR; | 
| +  if( rc!=SQLITE_OK ){ | 
| sqlite3Fts3ExprFree(*ppExpr); | 
| *ppExpr = 0; | 
| +    if( rc==SQLITE_TOOBIG ){ | 
| +      *pzErr = sqlite3_mprintf( | 
| +          "FTS expression tree is too large (maximum depth %d)", | 
| +          SQLITE_FTS3_MAX_EXPR_DEPTH | 
| +      ); | 
| +      rc = SQLITE_ERROR; | 
| +    }else if( rc==SQLITE_ERROR ){ | 
| +      *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); | 
| +    } | 
| } | 
|  | 
| return rc; | 
| } | 
|  | 
| /* | 
| +** Free a single node of an expression tree. | 
| +*/ | 
| +static void fts3FreeExprNode(Fts3Expr *p){ | 
| +  assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); | 
| +  sqlite3Fts3EvalPhraseCleanup(p->pPhrase); | 
| +  sqlite3_free(p->aMI); | 
| +  sqlite3_free(p); | 
| +} | 
| + | 
| +/* | 
| ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). | 
| +** | 
| +** This function would be simpler if it recursively called itself. But | 
| +** that would mean passing a sufficiently large expression to ExprParse() | 
| +** could cause a stack overflow. | 
| */ | 
| -void sqlite3Fts3ExprFree(Fts3Expr *p){ | 
| -  if( p ){ | 
| -    sqlite3Fts3ExprFree(p->pLeft); | 
| -    sqlite3Fts3ExprFree(p->pRight); | 
| -    sqlite3_free(p->aDoclist); | 
| -    sqlite3_free(p); | 
| +void sqlite3Fts3ExprFree(Fts3Expr *pDel){ | 
| +  Fts3Expr *p; | 
| +  assert( pDel==0 || pDel->pParent==0 ); | 
| +  for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ | 
| +    assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); | 
| +  } | 
| +  while( p ){ | 
| +    Fts3Expr *pParent = p->pParent; | 
| +    fts3FreeExprNode(p); | 
| +    if( pParent && p==pParent->pLeft && pParent->pRight ){ | 
| +      p = pParent->pRight; | 
| +      while( p && (p->pLeft || p->pRight) ){ | 
| +        assert( p==p->pParent->pRight || p==p->pParent->pLeft ); | 
| +        p = (p->pLeft ? p->pLeft : p->pRight); | 
| +      } | 
| +    }else{ | 
| +      p = pParent; | 
| +    } | 
| } | 
| } | 
|  | 
| @@ -795,12 +1121,15 @@ static int queryTestTokenizer( | 
| ** the returned expression text and then freed using sqlite3_free(). | 
| */ | 
| static char *exprToString(Fts3Expr *pExpr, char *zBuf){ | 
| +  if( pExpr==0 ){ | 
| +    return sqlite3_mprintf(""); | 
| +  } | 
| switch( pExpr->eType ){ | 
| case FTSQUERY_PHRASE: { | 
| Fts3Phrase *pPhrase = pExpr->pPhrase; | 
| int i; | 
| zBuf = sqlite3_mprintf( | 
| -          "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); | 
| +          "%zPHRASE %d 0", zBuf, pPhrase->iColumn); | 
| for(i=0; zBuf && i<pPhrase->nToken; i++){ | 
| zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, | 
| pPhrase->aToken[i].n, pPhrase->aToken[i].z, | 
| @@ -902,10 +1231,21 @@ static void fts3ExprTest( | 
| azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); | 
| } | 
|  | 
| -  rc = sqlite3Fts3ExprParse( | 
| -      pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr | 
| -  ); | 
| +  if( sqlite3_user_data(context) ){ | 
| +    char *zDummy = 0; | 
| +    rc = sqlite3Fts3ExprParse( | 
| +        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy | 
| +    ); | 
| +    assert( rc==SQLITE_OK || pExpr==0 ); | 
| +    sqlite3_free(zDummy); | 
| +  }else{ | 
| +    rc = fts3ExprParseUnbalanced( | 
| +        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr | 
| +    ); | 
| +  } | 
| + | 
| if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ | 
| +    sqlite3Fts3ExprFree(pExpr); | 
| sqlite3_result_error(context, "Error parsing expression", -1); | 
| }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ | 
| sqlite3_result_error_nomem(context); | 
| @@ -928,9 +1268,15 @@ exprtest_out: | 
| ** with database connection db. | 
| */ | 
| int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ | 
| -  return sqlite3_create_function( | 
| +  int rc = sqlite3_create_function( | 
| db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 | 
| ); | 
| +  if( rc==SQLITE_OK ){ | 
| +    rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", | 
| +        -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 | 
| +    ); | 
| +  } | 
| +  return rc; | 
| } | 
|  | 
| #endif | 
|  |