Index: third_party/sqlite/src/ext/fts3/fts3_expr.c |
diff --git a/third_party/sqlite/src/ext/fts3/fts3_expr.c b/third_party/sqlite/src/ext/fts3/fts3_expr.c |
index 43f6d84a8409773ac32e76032a164eb91900ff44..2ba786ce8092147b9b371b26929dda6f167ef3be 100644 |
--- a/third_party/sqlite/src/ext/fts3/fts3_expr.c |
+++ b/third_party/sqlite/src/ext/fts3/fts3_expr.c |
@@ -15,6 +15,7 @@ |
** syntax is relatively simple, the whole tokenizer/parser system is |
** hand-coded. |
*/ |
+#include "fts3Int.h" |
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
/* |
@@ -77,16 +78,26 @@ int sqlite3_fts3_enable_parentheses = 0; |
*/ |
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
-#include "fts3Int.h" |
#include <string.h> |
#include <assert.h> |
+/* |
+** isNot: |
+** This variable is used by function getNextNode(). When getNextNode() is |
+** called, it sets ParseContext.isNot to true if the 'next node' is a |
+** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the |
+** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to |
+** zero. |
+*/ |
typedef struct ParseContext ParseContext; |
struct ParseContext { |
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
+ int iLangid; /* Language id used with tokenizer */ |
const char **azCol; /* Array of column names for fts3 table */ |
+ int bFts4; /* True to allow FTS4-only syntax */ |
int nCol; /* Number of entries in azCol[] */ |
int iDefaultCol; /* Default column to query */ |
+ int isNot; /* True if getNextNode() sees a unary - */ |
sqlite3_context *pCtx; /* Write error message here */ |
int nNest; /* Number of nested brackets */ |
}; |
@@ -95,7 +106,7 @@ struct ParseContext { |
** This function is equivalent to the standard isspace() function. |
** |
** The standard isspace() can be awkward to use safely, because although it |
-** is defined to accept an argument of type int, its behaviour when passed |
+** is defined to accept an argument of type int, its behavior when passed |
** an integer that falls outside of the range of the unsigned char type |
** is undefined (and sometimes, "undefined" means segfault). This wrapper |
** is defined to accept an argument of type char, and always returns 0 for |
@@ -117,6 +128,38 @@ static void *fts3MallocZero(int nByte){ |
return pRet; |
} |
+int sqlite3Fts3OpenTokenizer( |
+ sqlite3_tokenizer *pTokenizer, |
+ int iLangid, |
+ const char *z, |
+ int n, |
+ sqlite3_tokenizer_cursor **ppCsr |
+){ |
+ sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
+ sqlite3_tokenizer_cursor *pCsr = 0; |
+ int rc; |
+ |
+ rc = pModule->xOpen(pTokenizer, z, n, &pCsr); |
+ assert( rc==SQLITE_OK || pCsr==0 ); |
+ if( rc==SQLITE_OK ){ |
+ pCsr->pTokenizer = pTokenizer; |
+ if( pModule->iVersion>=1 ){ |
+ rc = pModule->xLanguageid(pCsr, iLangid); |
+ if( rc!=SQLITE_OK ){ |
+ pModule->xClose(pCsr); |
+ pCsr = 0; |
+ } |
+ } |
+ } |
+ *ppCsr = pCsr; |
+ return rc; |
+} |
+ |
+/* |
+** Function getNextNode(), which is called by fts3ExprParse(), may itself |
+** call fts3ExprParse(). So this forward declaration is required. |
+*/ |
+static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
/* |
** Extract the next token from buffer z (length n) using the tokenizer |
@@ -142,17 +185,22 @@ static int getNextToken( |
int rc; |
sqlite3_tokenizer_cursor *pCursor; |
Fts3Expr *pRet = 0; |
- int nConsumed = 0; |
+ int i = 0; |
- rc = pModule->xOpen(pTokenizer, z, n, &pCursor); |
+ /* Set variable i to the maximum number of bytes of input to tokenize. */ |
+ for(i=0; i<n; i++){ |
+ if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; |
+ if( z[i]=='"' ) break; |
+ } |
+ |
+ *pnConsumed = i; |
+ rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); |
if( rc==SQLITE_OK ){ |
const char *zToken; |
- int nToken, iStart, iEnd, iPosition; |
+ int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; |
int nByte; /* total space to allocate */ |
- pCursor->pTokenizer = pTokenizer; |
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
- |
if( rc==SQLITE_OK ){ |
nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
pRet = (Fts3Expr *)fts3MallocZero(nByte); |
@@ -171,17 +219,30 @@ static int getNextToken( |
pRet->pPhrase->aToken[0].isPrefix = 1; |
iEnd++; |
} |
- if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ |
- pRet->pPhrase->isNot = 1; |
+ |
+ while( 1 ){ |
+ if( !sqlite3_fts3_enable_parentheses |
+ && iStart>0 && z[iStart-1]=='-' |
+ ){ |
+ pParse->isNot = 1; |
+ iStart--; |
+ }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ |
+ pRet->pPhrase->aToken[0].bFirst = 1; |
+ iStart--; |
+ }else{ |
+ break; |
+ } |
} |
+ |
} |
- nConsumed = iEnd; |
+ *pnConsumed = iEnd; |
+ }else if( i && rc==SQLITE_DONE ){ |
+ rc = SQLITE_OK; |
} |
pModule->xClose(pCursor); |
} |
- *pnConsumed = nConsumed; |
*ppExpr = pRet; |
return rc; |
} |
@@ -224,36 +285,56 @@ static int getNextString( |
char *zTemp = 0; |
int nTemp = 0; |
- rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); |
+ const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
+ int nToken = 0; |
+ |
+ /* The final Fts3Expr data structure, including the Fts3Phrase, |
+ ** Fts3PhraseToken structures token buffers are all stored as a single |
+ ** allocation so that the expression can be freed with a single call to |
+ ** sqlite3_free(). Setting this up requires a two pass approach. |
+ ** |
+ ** The first pass, in the block below, uses a tokenizer cursor to iterate |
+ ** through the tokens in the expression. This pass uses fts3ReallocOrFree() |
+ ** to assemble data in two dynamic buffers: |
+ ** |
+ ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase |
+ ** structure, followed by the array of Fts3PhraseToken |
+ ** structures. This pass only populates the Fts3PhraseToken array. |
+ ** |
+ ** Buffer zTemp: Contains copies of all tokens. |
+ ** |
+ ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, |
+ ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase |
+ ** structures. |
+ */ |
+ rc = sqlite3Fts3OpenTokenizer( |
+ pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); |
if( rc==SQLITE_OK ){ |
int ii; |
- pCursor->pTokenizer = pTokenizer; |
for(ii=0; rc==SQLITE_OK; ii++){ |
- const char *zToken; |
- int nToken, iBegin, iEnd, iPos; |
- rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); |
+ const char *zByte; |
+ int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; |
+ rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); |
if( rc==SQLITE_OK ){ |
- int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
- p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); |
- zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); |
- if( !p || !zTemp ){ |
- goto no_mem; |
- } |
- if( ii==0 ){ |
- memset(p, 0, nByte); |
- p->pPhrase = (Fts3Phrase *)&p[1]; |
- } |
- p->pPhrase = (Fts3Phrase *)&p[1]; |
- memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); |
- p->pPhrase->nToken = ii+1; |
- p->pPhrase->aToken[ii].n = nToken; |
- memcpy(&zTemp[nTemp], zToken, nToken); |
- nTemp += nToken; |
- if( iEnd<nInput && zInput[iEnd]=='*' ){ |
- p->pPhrase->aToken[ii].isPrefix = 1; |
- }else{ |
- p->pPhrase->aToken[ii].isPrefix = 0; |
- } |
+ Fts3PhraseToken *pToken; |
+ |
+ p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); |
+ if( !p ) goto no_mem; |
+ |
+ zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); |
+ if( !zTemp ) goto no_mem; |
+ |
+ assert( nToken==ii ); |
+ pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; |
+ memset(pToken, 0, sizeof(Fts3PhraseToken)); |
+ |
+ memcpy(&zTemp[nTemp], zByte, nByte); |
+ nTemp += nByte; |
+ |
+ pToken->n = nByte; |
+ pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); |
+ pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); |
+ nToken = ii+1; |
} |
} |
@@ -263,28 +344,28 @@ static int getNextString( |
if( rc==SQLITE_DONE ){ |
int jj; |
- char *zNew = NULL; |
- int nNew = 0; |
- int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
- nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); |
- p = fts3ReallocOrFree(p, nByte + nTemp); |
- if( !p ){ |
- goto no_mem; |
- } |
+ char *zBuf = 0; |
+ |
+ p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); |
+ if( !p ) goto no_mem; |
+ memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); |
+ p->eType = FTSQUERY_PHRASE; |
+ p->pPhrase = (Fts3Phrase *)&p[1]; |
+ p->pPhrase->iColumn = pParse->iDefaultCol; |
+ p->pPhrase->nToken = nToken; |
+ |
+ zBuf = (char *)&p->pPhrase->aToken[nToken]; |
if( zTemp ){ |
- zNew = &(((char *)p)[nByte]); |
- memcpy(zNew, zTemp, nTemp); |
+ memcpy(zBuf, zTemp, nTemp); |
+ sqlite3_free(zTemp); |
}else{ |
- memset(p, 0, nByte+nTemp); |
+ assert( nTemp==0 ); |
} |
- p->pPhrase = (Fts3Phrase *)&p[1]; |
+ |
for(jj=0; jj<p->pPhrase->nToken; jj++){ |
- p->pPhrase->aToken[jj].z = &zNew[nNew]; |
- nNew += p->pPhrase->aToken[jj].n; |
+ p->pPhrase->aToken[jj].z = zBuf; |
+ zBuf += p->pPhrase->aToken[jj].n; |
} |
- sqlite3_free(zTemp); |
- p->eType = FTSQUERY_PHRASE; |
- p->pPhrase->iColumn = pParse->iDefaultCol; |
rc = SQLITE_OK; |
} |
@@ -302,12 +383,6 @@ no_mem: |
} |
/* |
-** Function getNextNode(), which is called by fts3ExprParse(), may itself |
-** call fts3ExprParse(). So this forward declaration is required. |
-*/ |
-static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
- |
-/* |
** The output variable *ppExpr is populated with an allocated Fts3Expr |
** structure, or set to 0 if the end of the input buffer is reached. |
** |
@@ -341,6 +416,8 @@ static int getNextNode( |
const char *zInput = z; |
int nInput = n; |
+ pParse->isNot = 0; |
+ |
/* Skip over any whitespace before checking for a keyword, an open or |
** close bracket, or a quoted string. |
*/ |
@@ -401,27 +478,6 @@ static int getNextNode( |
} |
} |
- /* Check for an open bracket. */ |
- if( sqlite3_fts3_enable_parentheses ){ |
- if( *zInput=='(' ){ |
- int nConsumed; |
- pParse->nNest++; |
- rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); |
- if( rc==SQLITE_OK && !*ppExpr ){ |
- rc = SQLITE_DONE; |
- } |
- *pnConsumed = (int)((zInput - z) + 1 + nConsumed); |
- return rc; |
- } |
- |
- /* Check for a close bracket. */ |
- if( *zInput==')' ){ |
- pParse->nNest--; |
- *pnConsumed = (int)((zInput - z) + 1); |
- return SQLITE_DONE; |
- } |
- } |
- |
/* See if we are dealing with a quoted phrase. If this is the case, then |
** search for the closing quote and pass the whole string to getNextString() |
** for processing. This is easy to do, as fts3 has no syntax for escaping |
@@ -436,6 +492,21 @@ static int getNextNode( |
return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
} |
+ if( sqlite3_fts3_enable_parentheses ){ |
+ if( *zInput=='(' ){ |
+ int nConsumed = 0; |
+ pParse->nNest++; |
+ rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); |
+ if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } |
+ *pnConsumed = (int)(zInput - z) + 1 + nConsumed; |
+ return rc; |
+ }else if( *zInput==')' ){ |
+ pParse->nNest--; |
+ *pnConsumed = (int)((zInput - z) + 1); |
+ *ppExpr = 0; |
+ return SQLITE_DONE; |
+ } |
+ } |
/* If control flows to this point, this must be a regular token, or |
** the end of the input. Read a regular token using the sqlite3_tokenizer |
@@ -554,95 +625,100 @@ static int fts3ExprParse( |
while( rc==SQLITE_OK ){ |
Fts3Expr *p = 0; |
int nByte = 0; |
+ |
rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
+ assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); |
if( rc==SQLITE_OK ){ |
- int isPhrase; |
- |
- if( !sqlite3_fts3_enable_parentheses |
- && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot |
- ){ |
- /* Create an implicit NOT operator. */ |
- Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
- if( !pNot ){ |
- sqlite3Fts3ExprFree(p); |
- rc = SQLITE_NOMEM; |
- goto exprparse_out; |
- } |
- pNot->eType = FTSQUERY_NOT; |
- pNot->pRight = p; |
- if( pNotBranch ){ |
- pNot->pLeft = pNotBranch; |
- } |
- pNotBranch = pNot; |
- p = pPrev; |
- }else{ |
- int eType = p->eType; |
- assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); |
- isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
- |
- /* The isRequirePhrase variable is set to true if a phrase or |
- ** an expression contained in parenthesis is required. If a |
- ** binary operator (AND, OR, NOT or NEAR) is encounted when |
- ** isRequirePhrase is set, this is a syntax error. |
- */ |
- if( !isPhrase && isRequirePhrase ){ |
- sqlite3Fts3ExprFree(p); |
- rc = SQLITE_ERROR; |
- goto exprparse_out; |
- } |
- |
- if( isPhrase && !isRequirePhrase ){ |
- /* Insert an implicit AND operator. */ |
- Fts3Expr *pAnd; |
- assert( pRet && pPrev ); |
- pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
- if( !pAnd ){ |
+ if( p ){ |
+ int isPhrase; |
+ |
+ if( !sqlite3_fts3_enable_parentheses |
+ && p->eType==FTSQUERY_PHRASE && pParse->isNot |
+ ){ |
+ /* Create an implicit NOT operator. */ |
+ Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
+ if( !pNot ){ |
sqlite3Fts3ExprFree(p); |
rc = SQLITE_NOMEM; |
goto exprparse_out; |
} |
- pAnd->eType = FTSQUERY_AND; |
- insertBinaryOperator(&pRet, pPrev, pAnd); |
- pPrev = pAnd; |
- } |
+ pNot->eType = FTSQUERY_NOT; |
+ pNot->pRight = p; |
+ p->pParent = pNot; |
+ if( pNotBranch ){ |
+ pNot->pLeft = pNotBranch; |
+ pNotBranch->pParent = pNot; |
+ } |
+ pNotBranch = pNot; |
+ p = pPrev; |
+ }else{ |
+ int eType = p->eType; |
+ isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
+ |
+ /* The isRequirePhrase variable is set to true if a phrase or |
+ ** an expression contained in parenthesis is required. If a |
+ ** binary operator (AND, OR, NOT or NEAR) is encounted when |
+ ** isRequirePhrase is set, this is a syntax error. |
+ */ |
+ if( !isPhrase && isRequirePhrase ){ |
+ sqlite3Fts3ExprFree(p); |
+ rc = SQLITE_ERROR; |
+ goto exprparse_out; |
+ } |
+ |
+ if( isPhrase && !isRequirePhrase ){ |
+ /* Insert an implicit AND operator. */ |
+ Fts3Expr *pAnd; |
+ assert( pRet && pPrev ); |
+ pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
+ if( !pAnd ){ |
+ sqlite3Fts3ExprFree(p); |
+ rc = SQLITE_NOMEM; |
+ goto exprparse_out; |
+ } |
+ pAnd->eType = FTSQUERY_AND; |
+ insertBinaryOperator(&pRet, pPrev, pAnd); |
+ pPrev = pAnd; |
+ } |
- /* This test catches attempts to make either operand of a NEAR |
- ** operator something other than a phrase. For example, either of |
- ** the following: |
- ** |
- ** (bracketed expression) NEAR phrase |
- ** phrase NEAR (bracketed expression) |
- ** |
- ** Return an error in either case. |
- */ |
- if( pPrev && ( |
+ /* This test catches attempts to make either operand of a NEAR |
+ ** operator something other than a phrase. For example, either of |
+ ** the following: |
+ ** |
+ ** (bracketed expression) NEAR phrase |
+ ** phrase NEAR (bracketed expression) |
+ ** |
+ ** Return an error in either case. |
+ */ |
+ if( pPrev && ( |
(eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) |
|| (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) |
- )){ |
- sqlite3Fts3ExprFree(p); |
- rc = SQLITE_ERROR; |
- goto exprparse_out; |
- } |
- |
- if( isPhrase ){ |
- if( pRet ){ |
- assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
- pPrev->pRight = p; |
- p->pParent = pPrev; |
+ )){ |
+ sqlite3Fts3ExprFree(p); |
+ rc = SQLITE_ERROR; |
+ goto exprparse_out; |
+ } |
+ |
+ if( isPhrase ){ |
+ if( pRet ){ |
+ assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
+ pPrev->pRight = p; |
+ p->pParent = pPrev; |
+ }else{ |
+ pRet = p; |
+ } |
}else{ |
- pRet = p; |
+ insertBinaryOperator(&pRet, pPrev, p); |
} |
- }else{ |
- insertBinaryOperator(&pRet, pPrev, p); |
+ isRequirePhrase = !isPhrase; |
} |
- isRequirePhrase = !isPhrase; |
+ pPrev = p; |
} |
assert( nByte>0 ); |
} |
assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); |
nIn -= nByte; |
zIn += nByte; |
- pPrev = p; |
} |
if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ |
@@ -660,6 +736,7 @@ static int fts3ExprParse( |
pIter = pIter->pLeft; |
} |
pIter->pLeft = pRet; |
+ pRet->pParent = pIter; |
pRet = pNotBranch; |
} |
} |
@@ -677,6 +754,223 @@ exprparse_out: |
} |
/* |
+** Return SQLITE_ERROR if the maximum depth of the expression tree passed |
+** as the only argument is more than nMaxDepth. |
+*/ |
+static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ |
+ int rc = SQLITE_OK; |
+ if( p ){ |
+ if( nMaxDepth<0 ){ |
+ rc = SQLITE_TOOBIG; |
+ }else{ |
+ rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); |
+ if( rc==SQLITE_OK ){ |
+ rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); |
+ } |
+ } |
+ } |
+ return rc; |
+} |
+ |
+/* |
+** This function attempts to transform the expression tree at (*pp) to |
+** an equivalent but more balanced form. The tree is modified in place. |
+** If successful, SQLITE_OK is returned and (*pp) set to point to the |
+** new root expression node. |
+** |
+** nMaxDepth is the maximum allowable depth of the balanced sub-tree. |
+** |
+** Otherwise, if an error occurs, an SQLite error code is returned and |
+** expression (*pp) freed. |
+*/ |
+static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ |
+ int rc = SQLITE_OK; /* Return code */ |
+ Fts3Expr *pRoot = *pp; /* Initial root node */ |
+ Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ |
+ int eType = pRoot->eType; /* Type of node in this tree */ |
+ |
+ if( nMaxDepth==0 ){ |
+ rc = SQLITE_ERROR; |
+ } |
+ |
+ if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ |
+ Fts3Expr **apLeaf; |
+ apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); |
+ if( 0==apLeaf ){ |
+ rc = SQLITE_NOMEM; |
+ }else{ |
+ memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); |
+ } |
+ |
+ if( rc==SQLITE_OK ){ |
+ int i; |
+ Fts3Expr *p; |
+ |
+ /* Set $p to point to the left-most leaf in the tree of eType nodes. */ |
+ for(p=pRoot; p->eType==eType; p=p->pLeft){ |
+ assert( p->pParent==0 || p->pParent->pLeft==p ); |
+ assert( p->pLeft && p->pRight ); |
+ } |
+ |
+ /* This loop runs once for each leaf in the tree of eType nodes. */ |
+ while( 1 ){ |
+ int iLvl; |
+ Fts3Expr *pParent = p->pParent; /* Current parent of p */ |
+ |
+ assert( pParent==0 || pParent->pLeft==p ); |
+ p->pParent = 0; |
+ if( pParent ){ |
+ pParent->pLeft = 0; |
+ }else{ |
+ pRoot = 0; |
+ } |
+ rc = fts3ExprBalance(&p, nMaxDepth-1); |
+ if( rc!=SQLITE_OK ) break; |
+ |
+ for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ |
+ if( apLeaf[iLvl]==0 ){ |
+ apLeaf[iLvl] = p; |
+ p = 0; |
+ }else{ |
+ assert( pFree ); |
+ pFree->pLeft = apLeaf[iLvl]; |
+ pFree->pRight = p; |
+ pFree->pLeft->pParent = pFree; |
+ pFree->pRight->pParent = pFree; |
+ |
+ p = pFree; |
+ pFree = pFree->pParent; |
+ p->pParent = 0; |
+ apLeaf[iLvl] = 0; |
+ } |
+ } |
+ if( p ){ |
+ sqlite3Fts3ExprFree(p); |
+ rc = SQLITE_TOOBIG; |
+ break; |
+ } |
+ |
+ /* If that was the last leaf node, break out of the loop */ |
+ if( pParent==0 ) break; |
+ |
+ /* Set $p to point to the next leaf in the tree of eType nodes */ |
+ for(p=pParent->pRight; p->eType==eType; p=p->pLeft); |
+ |
+ /* Remove pParent from the original tree. */ |
+ assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); |
+ pParent->pRight->pParent = pParent->pParent; |
+ if( pParent->pParent ){ |
+ pParent->pParent->pLeft = pParent->pRight; |
+ }else{ |
+ assert( pParent==pRoot ); |
+ pRoot = pParent->pRight; |
+ } |
+ |
+ /* Link pParent into the free node list. It will be used as an |
+ ** internal node of the new tree. */ |
+ pParent->pParent = pFree; |
+ pFree = pParent; |
+ } |
+ |
+ if( rc==SQLITE_OK ){ |
+ p = 0; |
+ for(i=0; i<nMaxDepth; i++){ |
+ if( apLeaf[i] ){ |
+ if( p==0 ){ |
+ p = apLeaf[i]; |
+ p->pParent = 0; |
+ }else{ |
+ assert( pFree!=0 ); |
+ pFree->pRight = p; |
+ pFree->pLeft = apLeaf[i]; |
+ pFree->pLeft->pParent = pFree; |
+ pFree->pRight->pParent = pFree; |
+ |
+ p = pFree; |
+ pFree = pFree->pParent; |
+ p->pParent = 0; |
+ } |
+ } |
+ } |
+ pRoot = p; |
+ }else{ |
+ /* An error occurred. Delete the contents of the apLeaf[] array |
+ ** and pFree list. Everything else is cleaned up by the call to |
+ ** sqlite3Fts3ExprFree(pRoot) below. */ |
+ Fts3Expr *pDel; |
+ for(i=0; i<nMaxDepth; i++){ |
+ sqlite3Fts3ExprFree(apLeaf[i]); |
+ } |
+ while( (pDel=pFree)!=0 ){ |
+ pFree = pDel->pParent; |
+ sqlite3_free(pDel); |
+ } |
+ } |
+ |
+ assert( pFree==0 ); |
+ sqlite3_free( apLeaf ); |
+ } |
+ } |
+ |
+ if( rc!=SQLITE_OK ){ |
+ sqlite3Fts3ExprFree(pRoot); |
+ pRoot = 0; |
+ } |
+ *pp = pRoot; |
+ return rc; |
+} |
+ |
+/* |
+** This function is similar to sqlite3Fts3ExprParse(), with the following |
+** differences: |
+** |
+** 1. It does not do expression rebalancing. |
+** 2. It does not check that the expression does not exceed the |
+** maximum allowable depth. |
+** 3. Even if it fails, *ppExpr may still be set to point to an |
+** expression tree. It should be deleted using sqlite3Fts3ExprFree() |
+** in this case. |
+*/ |
+static int fts3ExprParseUnbalanced( |
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
+ int iLangid, /* Language id for tokenizer */ |
+ char **azCol, /* Array of column names for fts3 table */ |
+ int bFts4, /* True to allow FTS4-only syntax */ |
+ int nCol, /* Number of entries in azCol[] */ |
+ int iDefaultCol, /* Default column to query */ |
+ const char *z, int n, /* Text of MATCH query */ |
+ Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
+){ |
+ int nParsed; |
+ int rc; |
+ ParseContext sParse; |
+ |
+ memset(&sParse, 0, sizeof(ParseContext)); |
+ sParse.pTokenizer = pTokenizer; |
+ sParse.iLangid = iLangid; |
+ sParse.azCol = (const char **)azCol; |
+ sParse.nCol = nCol; |
+ sParse.iDefaultCol = iDefaultCol; |
+ sParse.bFts4 = bFts4; |
+ if( z==0 ){ |
+ *ppExpr = 0; |
+ return SQLITE_OK; |
+ } |
+ if( n<0 ){ |
+ n = (int)strlen(z); |
+ } |
+ rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
+ assert( rc==SQLITE_OK || *ppExpr==0 ); |
+ |
+ /* Check for mismatched parenthesis */ |
+ if( rc==SQLITE_OK && sParse.nNest ){ |
+ rc = SQLITE_ERROR; |
+ } |
+ |
+ return rc; |
+} |
+ |
+/* |
** Parameters z and n contain a pointer to and length of a buffer containing |
** an fts3 query expression, respectively. This function attempts to parse the |
** query expression and create a tree of Fts3Expr structures representing the |
@@ -702,48 +996,80 @@ exprparse_out: |
*/ |
int sqlite3Fts3ExprParse( |
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
+ int iLangid, /* Language id for tokenizer */ |
char **azCol, /* Array of column names for fts3 table */ |
+ int bFts4, /* True to allow FTS4-only syntax */ |
int nCol, /* Number of entries in azCol[] */ |
int iDefaultCol, /* Default column to query */ |
const char *z, int n, /* Text of MATCH query */ |
- Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
+ Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
+ char **pzErr /* OUT: Error message (sqlite3_malloc) */ |
){ |
- int nParsed; |
- int rc; |
- ParseContext sParse; |
- sParse.pTokenizer = pTokenizer; |
- sParse.azCol = (const char **)azCol; |
- sParse.nCol = nCol; |
- sParse.iDefaultCol = iDefaultCol; |
- sParse.nNest = 0; |
- if( z==0 ){ |
- *ppExpr = 0; |
- return SQLITE_OK; |
- } |
- if( n<0 ){ |
- n = (int)strlen(z); |
+ int rc = fts3ExprParseUnbalanced( |
+ pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr |
+ ); |
+ |
+ /* Rebalance the expression. And check that its depth does not exceed |
+ ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ |
+ if( rc==SQLITE_OK && *ppExpr ){ |
+ rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
+ if( rc==SQLITE_OK ){ |
+ rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
+ } |
} |
- rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
- /* Check for mismatched parenthesis */ |
- if( rc==SQLITE_OK && sParse.nNest ){ |
- rc = SQLITE_ERROR; |
+ if( rc!=SQLITE_OK ){ |
sqlite3Fts3ExprFree(*ppExpr); |
*ppExpr = 0; |
+ if( rc==SQLITE_TOOBIG ){ |
+ *pzErr = sqlite3_mprintf( |
+ "FTS expression tree is too large (maximum depth %d)", |
+ SQLITE_FTS3_MAX_EXPR_DEPTH |
+ ); |
+ rc = SQLITE_ERROR; |
+ }else if( rc==SQLITE_ERROR ){ |
+ *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); |
+ } |
} |
return rc; |
} |
/* |
+** Free a single node of an expression tree. |
+*/ |
+static void fts3FreeExprNode(Fts3Expr *p){ |
+ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); |
+ sqlite3Fts3EvalPhraseCleanup(p->pPhrase); |
+ sqlite3_free(p->aMI); |
+ sqlite3_free(p); |
+} |
+ |
+/* |
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
+** |
+** This function would be simpler if it recursively called itself. But |
+** that would mean passing a sufficiently large expression to ExprParse() |
+** could cause a stack overflow. |
*/ |
-void sqlite3Fts3ExprFree(Fts3Expr *p){ |
- if( p ){ |
- sqlite3Fts3ExprFree(p->pLeft); |
- sqlite3Fts3ExprFree(p->pRight); |
- sqlite3_free(p->aDoclist); |
- sqlite3_free(p); |
+void sqlite3Fts3ExprFree(Fts3Expr *pDel){ |
+ Fts3Expr *p; |
+ assert( pDel==0 || pDel->pParent==0 ); |
+ for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ |
+ assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); |
+ } |
+ while( p ){ |
+ Fts3Expr *pParent = p->pParent; |
+ fts3FreeExprNode(p); |
+ if( pParent && p==pParent->pLeft && pParent->pRight ){ |
+ p = pParent->pRight; |
+ while( p && (p->pLeft || p->pRight) ){ |
+ assert( p==p->pParent->pRight || p==p->pParent->pLeft ); |
+ p = (p->pLeft ? p->pLeft : p->pRight); |
+ } |
+ }else{ |
+ p = pParent; |
+ } |
} |
} |
@@ -795,12 +1121,15 @@ static int queryTestTokenizer( |
** the returned expression text and then freed using sqlite3_free(). |
*/ |
static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
+ if( pExpr==0 ){ |
+ return sqlite3_mprintf(""); |
+ } |
switch( pExpr->eType ){ |
case FTSQUERY_PHRASE: { |
Fts3Phrase *pPhrase = pExpr->pPhrase; |
int i; |
zBuf = sqlite3_mprintf( |
- "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); |
+ "%zPHRASE %d 0", zBuf, pPhrase->iColumn); |
for(i=0; zBuf && i<pPhrase->nToken; i++){ |
zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
@@ -902,10 +1231,21 @@ static void fts3ExprTest( |
azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
} |
- rc = sqlite3Fts3ExprParse( |
- pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr |
- ); |
+ if( sqlite3_user_data(context) ){ |
+ char *zDummy = 0; |
+ rc = sqlite3Fts3ExprParse( |
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy |
+ ); |
+ assert( rc==SQLITE_OK || pExpr==0 ); |
+ sqlite3_free(zDummy); |
+ }else{ |
+ rc = fts3ExprParseUnbalanced( |
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr |
+ ); |
+ } |
+ |
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
+ sqlite3Fts3ExprFree(pExpr); |
sqlite3_result_error(context, "Error parsing expression", -1); |
}else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
sqlite3_result_error_nomem(context); |
@@ -928,9 +1268,15 @@ exprtest_out: |
** with database connection db. |
*/ |
int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
- return sqlite3_create_function( |
+ int rc = sqlite3_create_function( |
db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
); |
+ if( rc==SQLITE_OK ){ |
+ rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", |
+ -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 |
+ ); |
+ } |
+ return rc; |
} |
#endif |