| Index: third_party/sqlite/src/ext/fts3/fts3_expr.c
|
| diff --git a/third_party/sqlite/src/ext/fts3/fts3_expr.c b/third_party/sqlite/src/ext/fts3/fts3_expr.c
|
| index 43f6d84a8409773ac32e76032a164eb91900ff44..2ba786ce8092147b9b371b26929dda6f167ef3be 100644
|
| --- a/third_party/sqlite/src/ext/fts3/fts3_expr.c
|
| +++ b/third_party/sqlite/src/ext/fts3/fts3_expr.c
|
| @@ -15,6 +15,7 @@
|
| ** syntax is relatively simple, the whole tokenizer/parser system is
|
| ** hand-coded.
|
| */
|
| +#include "fts3Int.h"
|
| #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
|
|
| /*
|
| @@ -77,16 +78,26 @@ int sqlite3_fts3_enable_parentheses = 0;
|
| */
|
| #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
|
|
|
| -#include "fts3Int.h"
|
| #include <string.h>
|
| #include <assert.h>
|
|
|
| +/*
|
| +** isNot:
|
| +** This variable is used by function getNextNode(). When getNextNode() is
|
| +** called, it sets ParseContext.isNot to true if the 'next node' is a
|
| +** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
|
| +** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
|
| +** zero.
|
| +*/
|
| typedef struct ParseContext ParseContext;
|
| struct ParseContext {
|
| sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
|
| + int iLangid; /* Language id used with tokenizer */
|
| const char **azCol; /* Array of column names for fts3 table */
|
| + int bFts4; /* True to allow FTS4-only syntax */
|
| int nCol; /* Number of entries in azCol[] */
|
| int iDefaultCol; /* Default column to query */
|
| + int isNot; /* True if getNextNode() sees a unary - */
|
| sqlite3_context *pCtx; /* Write error message here */
|
| int nNest; /* Number of nested brackets */
|
| };
|
| @@ -95,7 +106,7 @@ struct ParseContext {
|
| ** This function is equivalent to the standard isspace() function.
|
| **
|
| ** The standard isspace() can be awkward to use safely, because although it
|
| -** is defined to accept an argument of type int, its behaviour when passed
|
| +** is defined to accept an argument of type int, its behavior when passed
|
| ** an integer that falls outside of the range of the unsigned char type
|
| ** is undefined (and sometimes, "undefined" means segfault). This wrapper
|
| ** is defined to accept an argument of type char, and always returns 0 for
|
| @@ -117,6 +128,38 @@ static void *fts3MallocZero(int nByte){
|
| return pRet;
|
| }
|
|
|
| +int sqlite3Fts3OpenTokenizer(
|
| + sqlite3_tokenizer *pTokenizer,
|
| + int iLangid,
|
| + const char *z,
|
| + int n,
|
| + sqlite3_tokenizer_cursor **ppCsr
|
| +){
|
| + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
|
| + sqlite3_tokenizer_cursor *pCsr = 0;
|
| + int rc;
|
| +
|
| + rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
|
| + assert( rc==SQLITE_OK || pCsr==0 );
|
| + if( rc==SQLITE_OK ){
|
| + pCsr->pTokenizer = pTokenizer;
|
| + if( pModule->iVersion>=1 ){
|
| + rc = pModule->xLanguageid(pCsr, iLangid);
|
| + if( rc!=SQLITE_OK ){
|
| + pModule->xClose(pCsr);
|
| + pCsr = 0;
|
| + }
|
| + }
|
| + }
|
| + *ppCsr = pCsr;
|
| + return rc;
|
| +}
|
| +
|
| +/*
|
| +** Function getNextNode(), which is called by fts3ExprParse(), may itself
|
| +** call fts3ExprParse(). So this forward declaration is required.
|
| +*/
|
| +static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
|
|
|
| /*
|
| ** Extract the next token from buffer z (length n) using the tokenizer
|
| @@ -142,17 +185,22 @@ static int getNextToken(
|
| int rc;
|
| sqlite3_tokenizer_cursor *pCursor;
|
| Fts3Expr *pRet = 0;
|
| - int nConsumed = 0;
|
| + int i = 0;
|
|
|
| - rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
|
| + /* Set variable i to the maximum number of bytes of input to tokenize. */
|
| + for(i=0; i<n; i++){
|
| + if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
|
| + if( z[i]=='"' ) break;
|
| + }
|
| +
|
| + *pnConsumed = i;
|
| + rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
|
| if( rc==SQLITE_OK ){
|
| const char *zToken;
|
| - int nToken, iStart, iEnd, iPosition;
|
| + int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
|
| int nByte; /* total space to allocate */
|
|
|
| - pCursor->pTokenizer = pTokenizer;
|
| rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
|
| -
|
| if( rc==SQLITE_OK ){
|
| nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
|
| pRet = (Fts3Expr *)fts3MallocZero(nByte);
|
| @@ -171,17 +219,30 @@ static int getNextToken(
|
| pRet->pPhrase->aToken[0].isPrefix = 1;
|
| iEnd++;
|
| }
|
| - if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
|
| - pRet->pPhrase->isNot = 1;
|
| +
|
| + while( 1 ){
|
| + if( !sqlite3_fts3_enable_parentheses
|
| + && iStart>0 && z[iStart-1]=='-'
|
| + ){
|
| + pParse->isNot = 1;
|
| + iStart--;
|
| + }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
|
| + pRet->pPhrase->aToken[0].bFirst = 1;
|
| + iStart--;
|
| + }else{
|
| + break;
|
| + }
|
| }
|
| +
|
| }
|
| - nConsumed = iEnd;
|
| + *pnConsumed = iEnd;
|
| + }else if( i && rc==SQLITE_DONE ){
|
| + rc = SQLITE_OK;
|
| }
|
|
|
| pModule->xClose(pCursor);
|
| }
|
|
|
| - *pnConsumed = nConsumed;
|
| *ppExpr = pRet;
|
| return rc;
|
| }
|
| @@ -224,36 +285,56 @@ static int getNextString(
|
| char *zTemp = 0;
|
| int nTemp = 0;
|
|
|
| - rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
|
| + const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
| + int nToken = 0;
|
| +
|
| + /* The final Fts3Expr data structure, including the Fts3Phrase,
|
| + ** Fts3PhraseToken structures token buffers are all stored as a single
|
| + ** allocation so that the expression can be freed with a single call to
|
| + ** sqlite3_free(). Setting this up requires a two pass approach.
|
| + **
|
| + ** The first pass, in the block below, uses a tokenizer cursor to iterate
|
| + ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
|
| + ** to assemble data in two dynamic buffers:
|
| + **
|
| + ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
|
| + ** structure, followed by the array of Fts3PhraseToken
|
| + ** structures. This pass only populates the Fts3PhraseToken array.
|
| + **
|
| + ** Buffer zTemp: Contains copies of all tokens.
|
| + **
|
| + ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
|
| + ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
|
| + ** structures.
|
| + */
|
| + rc = sqlite3Fts3OpenTokenizer(
|
| + pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
|
| if( rc==SQLITE_OK ){
|
| int ii;
|
| - pCursor->pTokenizer = pTokenizer;
|
| for(ii=0; rc==SQLITE_OK; ii++){
|
| - const char *zToken;
|
| - int nToken, iBegin, iEnd, iPos;
|
| - rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
|
| + const char *zByte;
|
| + int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
|
| + rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
|
| if( rc==SQLITE_OK ){
|
| - int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
| - p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken));
|
| - zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
|
| - if( !p || !zTemp ){
|
| - goto no_mem;
|
| - }
|
| - if( ii==0 ){
|
| - memset(p, 0, nByte);
|
| - p->pPhrase = (Fts3Phrase *)&p[1];
|
| - }
|
| - p->pPhrase = (Fts3Phrase *)&p[1];
|
| - memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken));
|
| - p->pPhrase->nToken = ii+1;
|
| - p->pPhrase->aToken[ii].n = nToken;
|
| - memcpy(&zTemp[nTemp], zToken, nToken);
|
| - nTemp += nToken;
|
| - if( iEnd<nInput && zInput[iEnd]=='*' ){
|
| - p->pPhrase->aToken[ii].isPrefix = 1;
|
| - }else{
|
| - p->pPhrase->aToken[ii].isPrefix = 0;
|
| - }
|
| + Fts3PhraseToken *pToken;
|
| +
|
| + p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
|
| + if( !p ) goto no_mem;
|
| +
|
| + zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
|
| + if( !zTemp ) goto no_mem;
|
| +
|
| + assert( nToken==ii );
|
| + pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
|
| + memset(pToken, 0, sizeof(Fts3PhraseToken));
|
| +
|
| + memcpy(&zTemp[nTemp], zByte, nByte);
|
| + nTemp += nByte;
|
| +
|
| + pToken->n = nByte;
|
| + pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
|
| + pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
|
| + nToken = ii+1;
|
| }
|
| }
|
|
|
| @@ -263,28 +344,28 @@ static int getNextString(
|
|
|
| if( rc==SQLITE_DONE ){
|
| int jj;
|
| - char *zNew = NULL;
|
| - int nNew = 0;
|
| - int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
| - nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken);
|
| - p = fts3ReallocOrFree(p, nByte + nTemp);
|
| - if( !p ){
|
| - goto no_mem;
|
| - }
|
| + char *zBuf = 0;
|
| +
|
| + p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
|
| + if( !p ) goto no_mem;
|
| + memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
|
| + p->eType = FTSQUERY_PHRASE;
|
| + p->pPhrase = (Fts3Phrase *)&p[1];
|
| + p->pPhrase->iColumn = pParse->iDefaultCol;
|
| + p->pPhrase->nToken = nToken;
|
| +
|
| + zBuf = (char *)&p->pPhrase->aToken[nToken];
|
| if( zTemp ){
|
| - zNew = &(((char *)p)[nByte]);
|
| - memcpy(zNew, zTemp, nTemp);
|
| + memcpy(zBuf, zTemp, nTemp);
|
| + sqlite3_free(zTemp);
|
| }else{
|
| - memset(p, 0, nByte+nTemp);
|
| + assert( nTemp==0 );
|
| }
|
| - p->pPhrase = (Fts3Phrase *)&p[1];
|
| +
|
| for(jj=0; jj<p->pPhrase->nToken; jj++){
|
| - p->pPhrase->aToken[jj].z = &zNew[nNew];
|
| - nNew += p->pPhrase->aToken[jj].n;
|
| + p->pPhrase->aToken[jj].z = zBuf;
|
| + zBuf += p->pPhrase->aToken[jj].n;
|
| }
|
| - sqlite3_free(zTemp);
|
| - p->eType = FTSQUERY_PHRASE;
|
| - p->pPhrase->iColumn = pParse->iDefaultCol;
|
| rc = SQLITE_OK;
|
| }
|
|
|
| @@ -302,12 +383,6 @@ no_mem:
|
| }
|
|
|
| /*
|
| -** Function getNextNode(), which is called by fts3ExprParse(), may itself
|
| -** call fts3ExprParse(). So this forward declaration is required.
|
| -*/
|
| -static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
|
| -
|
| -/*
|
| ** The output variable *ppExpr is populated with an allocated Fts3Expr
|
| ** structure, or set to 0 if the end of the input buffer is reached.
|
| **
|
| @@ -341,6 +416,8 @@ static int getNextNode(
|
| const char *zInput = z;
|
| int nInput = n;
|
|
|
| + pParse->isNot = 0;
|
| +
|
| /* Skip over any whitespace before checking for a keyword, an open or
|
| ** close bracket, or a quoted string.
|
| */
|
| @@ -401,27 +478,6 @@ static int getNextNode(
|
| }
|
| }
|
|
|
| - /* Check for an open bracket. */
|
| - if( sqlite3_fts3_enable_parentheses ){
|
| - if( *zInput=='(' ){
|
| - int nConsumed;
|
| - pParse->nNest++;
|
| - rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
|
| - if( rc==SQLITE_OK && !*ppExpr ){
|
| - rc = SQLITE_DONE;
|
| - }
|
| - *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
|
| - return rc;
|
| - }
|
| -
|
| - /* Check for a close bracket. */
|
| - if( *zInput==')' ){
|
| - pParse->nNest--;
|
| - *pnConsumed = (int)((zInput - z) + 1);
|
| - return SQLITE_DONE;
|
| - }
|
| - }
|
| -
|
| /* See if we are dealing with a quoted phrase. If this is the case, then
|
| ** search for the closing quote and pass the whole string to getNextString()
|
| ** for processing. This is easy to do, as fts3 has no syntax for escaping
|
| @@ -436,6 +492,21 @@ static int getNextNode(
|
| return getNextString(pParse, &zInput[1], ii-1, ppExpr);
|
| }
|
|
|
| + if( sqlite3_fts3_enable_parentheses ){
|
| + if( *zInput=='(' ){
|
| + int nConsumed = 0;
|
| + pParse->nNest++;
|
| + rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
|
| + if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
|
| + *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
|
| + return rc;
|
| + }else if( *zInput==')' ){
|
| + pParse->nNest--;
|
| + *pnConsumed = (int)((zInput - z) + 1);
|
| + *ppExpr = 0;
|
| + return SQLITE_DONE;
|
| + }
|
| + }
|
|
|
| /* If control flows to this point, this must be a regular token, or
|
| ** the end of the input. Read a regular token using the sqlite3_tokenizer
|
| @@ -554,95 +625,100 @@ static int fts3ExprParse(
|
| while( rc==SQLITE_OK ){
|
| Fts3Expr *p = 0;
|
| int nByte = 0;
|
| +
|
| rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
|
| + assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
|
| if( rc==SQLITE_OK ){
|
| - int isPhrase;
|
| -
|
| - if( !sqlite3_fts3_enable_parentheses
|
| - && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot
|
| - ){
|
| - /* Create an implicit NOT operator. */
|
| - Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
|
| - if( !pNot ){
|
| - sqlite3Fts3ExprFree(p);
|
| - rc = SQLITE_NOMEM;
|
| - goto exprparse_out;
|
| - }
|
| - pNot->eType = FTSQUERY_NOT;
|
| - pNot->pRight = p;
|
| - if( pNotBranch ){
|
| - pNot->pLeft = pNotBranch;
|
| - }
|
| - pNotBranch = pNot;
|
| - p = pPrev;
|
| - }else{
|
| - int eType = p->eType;
|
| - assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
|
| - isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
|
| -
|
| - /* The isRequirePhrase variable is set to true if a phrase or
|
| - ** an expression contained in parenthesis is required. If a
|
| - ** binary operator (AND, OR, NOT or NEAR) is encounted when
|
| - ** isRequirePhrase is set, this is a syntax error.
|
| - */
|
| - if( !isPhrase && isRequirePhrase ){
|
| - sqlite3Fts3ExprFree(p);
|
| - rc = SQLITE_ERROR;
|
| - goto exprparse_out;
|
| - }
|
| -
|
| - if( isPhrase && !isRequirePhrase ){
|
| - /* Insert an implicit AND operator. */
|
| - Fts3Expr *pAnd;
|
| - assert( pRet && pPrev );
|
| - pAnd = fts3MallocZero(sizeof(Fts3Expr));
|
| - if( !pAnd ){
|
| + if( p ){
|
| + int isPhrase;
|
| +
|
| + if( !sqlite3_fts3_enable_parentheses
|
| + && p->eType==FTSQUERY_PHRASE && pParse->isNot
|
| + ){
|
| + /* Create an implicit NOT operator. */
|
| + Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
|
| + if( !pNot ){
|
| sqlite3Fts3ExprFree(p);
|
| rc = SQLITE_NOMEM;
|
| goto exprparse_out;
|
| }
|
| - pAnd->eType = FTSQUERY_AND;
|
| - insertBinaryOperator(&pRet, pPrev, pAnd);
|
| - pPrev = pAnd;
|
| - }
|
| + pNot->eType = FTSQUERY_NOT;
|
| + pNot->pRight = p;
|
| + p->pParent = pNot;
|
| + if( pNotBranch ){
|
| + pNot->pLeft = pNotBranch;
|
| + pNotBranch->pParent = pNot;
|
| + }
|
| + pNotBranch = pNot;
|
| + p = pPrev;
|
| + }else{
|
| + int eType = p->eType;
|
| + isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
|
| +
|
| + /* The isRequirePhrase variable is set to true if a phrase or
|
| + ** an expression contained in parenthesis is required. If a
|
| + ** binary operator (AND, OR, NOT or NEAR) is encounted when
|
| + ** isRequirePhrase is set, this is a syntax error.
|
| + */
|
| + if( !isPhrase && isRequirePhrase ){
|
| + sqlite3Fts3ExprFree(p);
|
| + rc = SQLITE_ERROR;
|
| + goto exprparse_out;
|
| + }
|
| +
|
| + if( isPhrase && !isRequirePhrase ){
|
| + /* Insert an implicit AND operator. */
|
| + Fts3Expr *pAnd;
|
| + assert( pRet && pPrev );
|
| + pAnd = fts3MallocZero(sizeof(Fts3Expr));
|
| + if( !pAnd ){
|
| + sqlite3Fts3ExprFree(p);
|
| + rc = SQLITE_NOMEM;
|
| + goto exprparse_out;
|
| + }
|
| + pAnd->eType = FTSQUERY_AND;
|
| + insertBinaryOperator(&pRet, pPrev, pAnd);
|
| + pPrev = pAnd;
|
| + }
|
|
|
| - /* This test catches attempts to make either operand of a NEAR
|
| - ** operator something other than a phrase. For example, either of
|
| - ** the following:
|
| - **
|
| - ** (bracketed expression) NEAR phrase
|
| - ** phrase NEAR (bracketed expression)
|
| - **
|
| - ** Return an error in either case.
|
| - */
|
| - if( pPrev && (
|
| + /* This test catches attempts to make either operand of a NEAR
|
| + ** operator something other than a phrase. For example, either of
|
| + ** the following:
|
| + **
|
| + ** (bracketed expression) NEAR phrase
|
| + ** phrase NEAR (bracketed expression)
|
| + **
|
| + ** Return an error in either case.
|
| + */
|
| + if( pPrev && (
|
| (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
|
| || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
|
| - )){
|
| - sqlite3Fts3ExprFree(p);
|
| - rc = SQLITE_ERROR;
|
| - goto exprparse_out;
|
| - }
|
| -
|
| - if( isPhrase ){
|
| - if( pRet ){
|
| - assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
|
| - pPrev->pRight = p;
|
| - p->pParent = pPrev;
|
| + )){
|
| + sqlite3Fts3ExprFree(p);
|
| + rc = SQLITE_ERROR;
|
| + goto exprparse_out;
|
| + }
|
| +
|
| + if( isPhrase ){
|
| + if( pRet ){
|
| + assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
|
| + pPrev->pRight = p;
|
| + p->pParent = pPrev;
|
| + }else{
|
| + pRet = p;
|
| + }
|
| }else{
|
| - pRet = p;
|
| + insertBinaryOperator(&pRet, pPrev, p);
|
| }
|
| - }else{
|
| - insertBinaryOperator(&pRet, pPrev, p);
|
| + isRequirePhrase = !isPhrase;
|
| }
|
| - isRequirePhrase = !isPhrase;
|
| + pPrev = p;
|
| }
|
| assert( nByte>0 );
|
| }
|
| assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
|
| nIn -= nByte;
|
| zIn += nByte;
|
| - pPrev = p;
|
| }
|
|
|
| if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
|
| @@ -660,6 +736,7 @@ static int fts3ExprParse(
|
| pIter = pIter->pLeft;
|
| }
|
| pIter->pLeft = pRet;
|
| + pRet->pParent = pIter;
|
| pRet = pNotBranch;
|
| }
|
| }
|
| @@ -677,6 +754,223 @@ exprparse_out:
|
| }
|
|
|
| /*
|
| +** Return SQLITE_ERROR if the maximum depth of the expression tree passed
|
| +** as the only argument is more than nMaxDepth.
|
| +*/
|
| +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
|
| + int rc = SQLITE_OK;
|
| + if( p ){
|
| + if( nMaxDepth<0 ){
|
| + rc = SQLITE_TOOBIG;
|
| + }else{
|
| + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
|
| + if( rc==SQLITE_OK ){
|
| + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
|
| + }
|
| + }
|
| + }
|
| + return rc;
|
| +}
|
| +
|
| +/*
|
| +** This function attempts to transform the expression tree at (*pp) to
|
| +** an equivalent but more balanced form. The tree is modified in place.
|
| +** If successful, SQLITE_OK is returned and (*pp) set to point to the
|
| +** new root expression node.
|
| +**
|
| +** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
|
| +**
|
| +** Otherwise, if an error occurs, an SQLite error code is returned and
|
| +** expression (*pp) freed.
|
| +*/
|
| +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
|
| + int rc = SQLITE_OK; /* Return code */
|
| + Fts3Expr *pRoot = *pp; /* Initial root node */
|
| + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
|
| + int eType = pRoot->eType; /* Type of node in this tree */
|
| +
|
| + if( nMaxDepth==0 ){
|
| + rc = SQLITE_ERROR;
|
| + }
|
| +
|
| + if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
|
| + Fts3Expr **apLeaf;
|
| + apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
|
| + if( 0==apLeaf ){
|
| + rc = SQLITE_NOMEM;
|
| + }else{
|
| + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
|
| + }
|
| +
|
| + if( rc==SQLITE_OK ){
|
| + int i;
|
| + Fts3Expr *p;
|
| +
|
| + /* Set $p to point to the left-most leaf in the tree of eType nodes. */
|
| + for(p=pRoot; p->eType==eType; p=p->pLeft){
|
| + assert( p->pParent==0 || p->pParent->pLeft==p );
|
| + assert( p->pLeft && p->pRight );
|
| + }
|
| +
|
| + /* This loop runs once for each leaf in the tree of eType nodes. */
|
| + while( 1 ){
|
| + int iLvl;
|
| + Fts3Expr *pParent = p->pParent; /* Current parent of p */
|
| +
|
| + assert( pParent==0 || pParent->pLeft==p );
|
| + p->pParent = 0;
|
| + if( pParent ){
|
| + pParent->pLeft = 0;
|
| + }else{
|
| + pRoot = 0;
|
| + }
|
| + rc = fts3ExprBalance(&p, nMaxDepth-1);
|
| + if( rc!=SQLITE_OK ) break;
|
| +
|
| + for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
|
| + if( apLeaf[iLvl]==0 ){
|
| + apLeaf[iLvl] = p;
|
| + p = 0;
|
| + }else{
|
| + assert( pFree );
|
| + pFree->pLeft = apLeaf[iLvl];
|
| + pFree->pRight = p;
|
| + pFree->pLeft->pParent = pFree;
|
| + pFree->pRight->pParent = pFree;
|
| +
|
| + p = pFree;
|
| + pFree = pFree->pParent;
|
| + p->pParent = 0;
|
| + apLeaf[iLvl] = 0;
|
| + }
|
| + }
|
| + if( p ){
|
| + sqlite3Fts3ExprFree(p);
|
| + rc = SQLITE_TOOBIG;
|
| + break;
|
| + }
|
| +
|
| + /* If that was the last leaf node, break out of the loop */
|
| + if( pParent==0 ) break;
|
| +
|
| + /* Set $p to point to the next leaf in the tree of eType nodes */
|
| + for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
|
| +
|
| + /* Remove pParent from the original tree. */
|
| + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
|
| + pParent->pRight->pParent = pParent->pParent;
|
| + if( pParent->pParent ){
|
| + pParent->pParent->pLeft = pParent->pRight;
|
| + }else{
|
| + assert( pParent==pRoot );
|
| + pRoot = pParent->pRight;
|
| + }
|
| +
|
| + /* Link pParent into the free node list. It will be used as an
|
| + ** internal node of the new tree. */
|
| + pParent->pParent = pFree;
|
| + pFree = pParent;
|
| + }
|
| +
|
| + if( rc==SQLITE_OK ){
|
| + p = 0;
|
| + for(i=0; i<nMaxDepth; i++){
|
| + if( apLeaf[i] ){
|
| + if( p==0 ){
|
| + p = apLeaf[i];
|
| + p->pParent = 0;
|
| + }else{
|
| + assert( pFree!=0 );
|
| + pFree->pRight = p;
|
| + pFree->pLeft = apLeaf[i];
|
| + pFree->pLeft->pParent = pFree;
|
| + pFree->pRight->pParent = pFree;
|
| +
|
| + p = pFree;
|
| + pFree = pFree->pParent;
|
| + p->pParent = 0;
|
| + }
|
| + }
|
| + }
|
| + pRoot = p;
|
| + }else{
|
| + /* An error occurred. Delete the contents of the apLeaf[] array
|
| + ** and pFree list. Everything else is cleaned up by the call to
|
| + ** sqlite3Fts3ExprFree(pRoot) below. */
|
| + Fts3Expr *pDel;
|
| + for(i=0; i<nMaxDepth; i++){
|
| + sqlite3Fts3ExprFree(apLeaf[i]);
|
| + }
|
| + while( (pDel=pFree)!=0 ){
|
| + pFree = pDel->pParent;
|
| + sqlite3_free(pDel);
|
| + }
|
| + }
|
| +
|
| + assert( pFree==0 );
|
| + sqlite3_free( apLeaf );
|
| + }
|
| + }
|
| +
|
| + if( rc!=SQLITE_OK ){
|
| + sqlite3Fts3ExprFree(pRoot);
|
| + pRoot = 0;
|
| + }
|
| + *pp = pRoot;
|
| + return rc;
|
| +}
|
| +
|
| +/*
|
| +** This function is similar to sqlite3Fts3ExprParse(), with the following
|
| +** differences:
|
| +**
|
| +** 1. It does not do expression rebalancing.
|
| +** 2. It does not check that the expression does not exceed the
|
| +** maximum allowable depth.
|
| +** 3. Even if it fails, *ppExpr may still be set to point to an
|
| +** expression tree. It should be deleted using sqlite3Fts3ExprFree()
|
| +** in this case.
|
| +*/
|
| +static int fts3ExprParseUnbalanced(
|
| + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
|
| + int iLangid, /* Language id for tokenizer */
|
| + char **azCol, /* Array of column names for fts3 table */
|
| + int bFts4, /* True to allow FTS4-only syntax */
|
| + int nCol, /* Number of entries in azCol[] */
|
| + int iDefaultCol, /* Default column to query */
|
| + const char *z, int n, /* Text of MATCH query */
|
| + Fts3Expr **ppExpr /* OUT: Parsed query structure */
|
| +){
|
| + int nParsed;
|
| + int rc;
|
| + ParseContext sParse;
|
| +
|
| + memset(&sParse, 0, sizeof(ParseContext));
|
| + sParse.pTokenizer = pTokenizer;
|
| + sParse.iLangid = iLangid;
|
| + sParse.azCol = (const char **)azCol;
|
| + sParse.nCol = nCol;
|
| + sParse.iDefaultCol = iDefaultCol;
|
| + sParse.bFts4 = bFts4;
|
| + if( z==0 ){
|
| + *ppExpr = 0;
|
| + return SQLITE_OK;
|
| + }
|
| + if( n<0 ){
|
| + n = (int)strlen(z);
|
| + }
|
| + rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
|
| + assert( rc==SQLITE_OK || *ppExpr==0 );
|
| +
|
| + /* Check for mismatched parenthesis */
|
| + if( rc==SQLITE_OK && sParse.nNest ){
|
| + rc = SQLITE_ERROR;
|
| + }
|
| +
|
| + return rc;
|
| +}
|
| +
|
| +/*
|
| ** Parameters z and n contain a pointer to and length of a buffer containing
|
| ** an fts3 query expression, respectively. This function attempts to parse the
|
| ** query expression and create a tree of Fts3Expr structures representing the
|
| @@ -702,48 +996,80 @@ exprparse_out:
|
| */
|
| int sqlite3Fts3ExprParse(
|
| sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
|
| + int iLangid, /* Language id for tokenizer */
|
| char **azCol, /* Array of column names for fts3 table */
|
| + int bFts4, /* True to allow FTS4-only syntax */
|
| int nCol, /* Number of entries in azCol[] */
|
| int iDefaultCol, /* Default column to query */
|
| const char *z, int n, /* Text of MATCH query */
|
| - Fts3Expr **ppExpr /* OUT: Parsed query structure */
|
| + Fts3Expr **ppExpr, /* OUT: Parsed query structure */
|
| + char **pzErr /* OUT: Error message (sqlite3_malloc) */
|
| ){
|
| - int nParsed;
|
| - int rc;
|
| - ParseContext sParse;
|
| - sParse.pTokenizer = pTokenizer;
|
| - sParse.azCol = (const char **)azCol;
|
| - sParse.nCol = nCol;
|
| - sParse.iDefaultCol = iDefaultCol;
|
| - sParse.nNest = 0;
|
| - if( z==0 ){
|
| - *ppExpr = 0;
|
| - return SQLITE_OK;
|
| - }
|
| - if( n<0 ){
|
| - n = (int)strlen(z);
|
| + int rc = fts3ExprParseUnbalanced(
|
| + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
|
| + );
|
| +
|
| + /* Rebalance the expression. And check that its depth does not exceed
|
| + ** SQLITE_FTS3_MAX_EXPR_DEPTH. */
|
| + if( rc==SQLITE_OK && *ppExpr ){
|
| + rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
|
| + if( rc==SQLITE_OK ){
|
| + rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
|
| + }
|
| }
|
| - rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
|
|
|
| - /* Check for mismatched parenthesis */
|
| - if( rc==SQLITE_OK && sParse.nNest ){
|
| - rc = SQLITE_ERROR;
|
| + if( rc!=SQLITE_OK ){
|
| sqlite3Fts3ExprFree(*ppExpr);
|
| *ppExpr = 0;
|
| + if( rc==SQLITE_TOOBIG ){
|
| + *pzErr = sqlite3_mprintf(
|
| + "FTS expression tree is too large (maximum depth %d)",
|
| + SQLITE_FTS3_MAX_EXPR_DEPTH
|
| + );
|
| + rc = SQLITE_ERROR;
|
| + }else if( rc==SQLITE_ERROR ){
|
| + *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
|
| + }
|
| }
|
|
|
| return rc;
|
| }
|
|
|
| /*
|
| +** Free a single node of an expression tree.
|
| +*/
|
| +static void fts3FreeExprNode(Fts3Expr *p){
|
| + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
|
| + sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
|
| + sqlite3_free(p->aMI);
|
| + sqlite3_free(p);
|
| +}
|
| +
|
| +/*
|
| ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
|
| +**
|
| +** This function would be simpler if it recursively called itself. But
|
| +** that would mean passing a sufficiently large expression to ExprParse()
|
| +** could cause a stack overflow.
|
| */
|
| -void sqlite3Fts3ExprFree(Fts3Expr *p){
|
| - if( p ){
|
| - sqlite3Fts3ExprFree(p->pLeft);
|
| - sqlite3Fts3ExprFree(p->pRight);
|
| - sqlite3_free(p->aDoclist);
|
| - sqlite3_free(p);
|
| +void sqlite3Fts3ExprFree(Fts3Expr *pDel){
|
| + Fts3Expr *p;
|
| + assert( pDel==0 || pDel->pParent==0 );
|
| + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
|
| + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
|
| + }
|
| + while( p ){
|
| + Fts3Expr *pParent = p->pParent;
|
| + fts3FreeExprNode(p);
|
| + if( pParent && p==pParent->pLeft && pParent->pRight ){
|
| + p = pParent->pRight;
|
| + while( p && (p->pLeft || p->pRight) ){
|
| + assert( p==p->pParent->pRight || p==p->pParent->pLeft );
|
| + p = (p->pLeft ? p->pLeft : p->pRight);
|
| + }
|
| + }else{
|
| + p = pParent;
|
| + }
|
| }
|
| }
|
|
|
| @@ -795,12 +1121,15 @@ static int queryTestTokenizer(
|
| ** the returned expression text and then freed using sqlite3_free().
|
| */
|
| static char *exprToString(Fts3Expr *pExpr, char *zBuf){
|
| + if( pExpr==0 ){
|
| + return sqlite3_mprintf("");
|
| + }
|
| switch( pExpr->eType ){
|
| case FTSQUERY_PHRASE: {
|
| Fts3Phrase *pPhrase = pExpr->pPhrase;
|
| int i;
|
| zBuf = sqlite3_mprintf(
|
| - "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot);
|
| + "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
|
| for(i=0; zBuf && i<pPhrase->nToken; i++){
|
| zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
|
| pPhrase->aToken[i].n, pPhrase->aToken[i].z,
|
| @@ -902,10 +1231,21 @@ static void fts3ExprTest(
|
| azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
|
| }
|
|
|
| - rc = sqlite3Fts3ExprParse(
|
| - pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
|
| - );
|
| + if( sqlite3_user_data(context) ){
|
| + char *zDummy = 0;
|
| + rc = sqlite3Fts3ExprParse(
|
| + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
|
| + );
|
| + assert( rc==SQLITE_OK || pExpr==0 );
|
| + sqlite3_free(zDummy);
|
| + }else{
|
| + rc = fts3ExprParseUnbalanced(
|
| + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
|
| + );
|
| + }
|
| +
|
| if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
|
| + sqlite3Fts3ExprFree(pExpr);
|
| sqlite3_result_error(context, "Error parsing expression", -1);
|
| }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
|
| sqlite3_result_error_nomem(context);
|
| @@ -928,9 +1268,15 @@ exprtest_out:
|
| ** with database connection db.
|
| */
|
| int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
|
| - return sqlite3_create_function(
|
| + int rc = sqlite3_create_function(
|
| db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
|
| );
|
| + if( rc==SQLITE_OK ){
|
| + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
|
| + -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
|
| + );
|
| + }
|
| + return rc;
|
| }
|
|
|
| #endif
|
|
|