OLD | NEW |
1 /* | 1 /* |
2 ** 2008 Nov 28 | 2 ** 2008 Nov 28 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
11 ****************************************************************************** | 11 ****************************************************************************** |
12 ** | 12 ** |
13 ** This module contains code that implements a parser for fts3 query strings | 13 ** This module contains code that implements a parser for fts3 query strings |
14 ** (the right-hand argument to the MATCH operator). Because the supported | 14 ** (the right-hand argument to the MATCH operator). Because the supported |
15 ** syntax is relatively simple, the whole tokenizer/parser system is | 15 ** syntax is relatively simple, the whole tokenizer/parser system is |
16 ** hand-coded. | 16 ** hand-coded. |
17 */ | 17 */ |
| 18 #include "fts3Int.h" |
18 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
19 | 20 |
20 /* | 21 /* |
21 ** By default, this module parses the legacy syntax that has been | 22 ** By default, this module parses the legacy syntax that has been |
22 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS | 23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS |
23 ** is defined, then it uses the new syntax. The differences between | 24 ** is defined, then it uses the new syntax. The differences between |
24 ** the new and the old syntaxes are: | 25 ** the new and the old syntaxes are: |
25 ** | 26 ** |
26 ** a) The new syntax supports parenthesis. The old does not. | 27 ** a) The new syntax supports parenthesis. The old does not. |
27 ** | 28 ** |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
70 # else | 71 # else |
71 # define sqlite3_fts3_enable_parentheses 0 | 72 # define sqlite3_fts3_enable_parentheses 0 |
72 # endif | 73 # endif |
73 #endif | 74 #endif |
74 | 75 |
75 /* | 76 /* |
76 ** Default span for NEAR operators. | 77 ** Default span for NEAR operators. |
77 */ | 78 */ |
78 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 | 79 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
79 | 80 |
80 #include "fts3Int.h" | |
81 #include <string.h> | 81 #include <string.h> |
82 #include <assert.h> | 82 #include <assert.h> |
83 | 83 |
| 84 /* |
| 85 ** isNot: |
| 86 ** This variable is used by function getNextNode(). When getNextNode() is |
| 87 ** called, it sets ParseContext.isNot to true if the 'next node' is a |
| 88 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the |
| 89 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to |
| 90 ** zero. |
| 91 */ |
84 typedef struct ParseContext ParseContext; | 92 typedef struct ParseContext ParseContext; |
85 struct ParseContext { | 93 struct ParseContext { |
86 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ | 94 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
| 95 int iLangid; /* Language id used with tokenizer */ |
87 const char **azCol; /* Array of column names for fts3 table */ | 96 const char **azCol; /* Array of column names for fts3 table */ |
| 97 int bFts4; /* True to allow FTS4-only syntax */ |
88 int nCol; /* Number of entries in azCol[] */ | 98 int nCol; /* Number of entries in azCol[] */ |
89 int iDefaultCol; /* Default column to query */ | 99 int iDefaultCol; /* Default column to query */ |
| 100 int isNot; /* True if getNextNode() sees a unary - */ |
90 sqlite3_context *pCtx; /* Write error message here */ | 101 sqlite3_context *pCtx; /* Write error message here */ |
91 int nNest; /* Number of nested brackets */ | 102 int nNest; /* Number of nested brackets */ |
92 }; | 103 }; |
93 | 104 |
94 /* | 105 /* |
95 ** This function is equivalent to the standard isspace() function. | 106 ** This function is equivalent to the standard isspace() function. |
96 ** | 107 ** |
97 ** The standard isspace() can be awkward to use safely, because although it | 108 ** The standard isspace() can be awkward to use safely, because although it |
98 ** is defined to accept an argument of type int, its behaviour when passed | 109 ** is defined to accept an argument of type int, its behavior when passed |
99 ** an integer that falls outside of the range of the unsigned char type | 110 ** an integer that falls outside of the range of the unsigned char type |
100 ** is undefined (and sometimes, "undefined" means segfault). This wrapper | 111 ** is undefined (and sometimes, "undefined" means segfault). This wrapper |
101 ** is defined to accept an argument of type char, and always returns 0 for | 112 ** is defined to accept an argument of type char, and always returns 0 for |
102 ** any values that fall outside of the range of the unsigned char type (i.e. | 113 ** any values that fall outside of the range of the unsigned char type (i.e. |
103 ** negative values). | 114 ** negative values). |
104 */ | 115 */ |
105 static int fts3isspace(char c){ | 116 static int fts3isspace(char c){ |
106 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; | 117 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
107 } | 118 } |
108 | 119 |
109 /* | 120 /* |
110 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, | 121 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, |
111 ** zero the memory before returning a pointer to it. If unsuccessful, | 122 ** zero the memory before returning a pointer to it. If unsuccessful, |
112 ** return NULL. | 123 ** return NULL. |
113 */ | 124 */ |
114 static void *fts3MallocZero(int nByte){ | 125 static void *fts3MallocZero(int nByte){ |
115 void *pRet = sqlite3_malloc(nByte); | 126 void *pRet = sqlite3_malloc(nByte); |
116 if( pRet ) memset(pRet, 0, nByte); | 127 if( pRet ) memset(pRet, 0, nByte); |
117 return pRet; | 128 return pRet; |
118 } | 129 } |
119 | 130 |
| 131 int sqlite3Fts3OpenTokenizer( |
| 132 sqlite3_tokenizer *pTokenizer, |
| 133 int iLangid, |
| 134 const char *z, |
| 135 int n, |
| 136 sqlite3_tokenizer_cursor **ppCsr |
| 137 ){ |
| 138 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 139 sqlite3_tokenizer_cursor *pCsr = 0; |
| 140 int rc; |
| 141 |
| 142 rc = pModule->xOpen(pTokenizer, z, n, &pCsr); |
| 143 assert( rc==SQLITE_OK || pCsr==0 ); |
| 144 if( rc==SQLITE_OK ){ |
| 145 pCsr->pTokenizer = pTokenizer; |
| 146 if( pModule->iVersion>=1 ){ |
| 147 rc = pModule->xLanguageid(pCsr, iLangid); |
| 148 if( rc!=SQLITE_OK ){ |
| 149 pModule->xClose(pCsr); |
| 150 pCsr = 0; |
| 151 } |
| 152 } |
| 153 } |
| 154 *ppCsr = pCsr; |
| 155 return rc; |
| 156 } |
| 157 |
| 158 /* |
| 159 ** Function getNextNode(), which is called by fts3ExprParse(), may itself |
| 160 ** call fts3ExprParse(). So this forward declaration is required. |
| 161 */ |
| 162 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
120 | 163 |
121 /* | 164 /* |
122 ** Extract the next token from buffer z (length n) using the tokenizer | 165 ** Extract the next token from buffer z (length n) using the tokenizer |
123 ** and other information (column names etc.) in pParse. Create an Fts3Expr | 166 ** and other information (column names etc.) in pParse. Create an Fts3Expr |
124 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this | 167 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this |
125 ** single token and set *ppExpr to point to it. If the end of the buffer is | 168 ** single token and set *ppExpr to point to it. If the end of the buffer is |
126 ** reached before a token is found, set *ppExpr to zero. It is the | 169 ** reached before a token is found, set *ppExpr to zero. It is the |
127 ** responsibility of the caller to eventually deallocate the allocated | 170 ** responsibility of the caller to eventually deallocate the allocated |
128 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). | 171 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). |
129 ** | 172 ** |
130 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation | 173 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation |
131 ** fails. | 174 ** fails. |
132 */ | 175 */ |
133 static int getNextToken( | 176 static int getNextToken( |
134 ParseContext *pParse, /* fts3 query parse context */ | 177 ParseContext *pParse, /* fts3 query parse context */ |
135 int iCol, /* Value for Fts3Phrase.iColumn */ | 178 int iCol, /* Value for Fts3Phrase.iColumn */ |
136 const char *z, int n, /* Input string */ | 179 const char *z, int n, /* Input string */ |
137 Fts3Expr **ppExpr, /* OUT: expression */ | 180 Fts3Expr **ppExpr, /* OUT: expression */ |
138 int *pnConsumed /* OUT: Number of bytes consumed */ | 181 int *pnConsumed /* OUT: Number of bytes consumed */ |
139 ){ | 182 ){ |
140 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; | 183 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
141 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | 184 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
142 int rc; | 185 int rc; |
143 sqlite3_tokenizer_cursor *pCursor; | 186 sqlite3_tokenizer_cursor *pCursor; |
144 Fts3Expr *pRet = 0; | 187 Fts3Expr *pRet = 0; |
145 int nConsumed = 0; | 188 int i = 0; |
146 | 189 |
147 rc = pModule->xOpen(pTokenizer, z, n, &pCursor); | 190 /* Set variable i to the maximum number of bytes of input to tokenize. */ |
| 191 for(i=0; i<n; i++){ |
| 192 if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; |
| 193 if( z[i]=='"' ) break; |
| 194 } |
| 195 |
| 196 *pnConsumed = i; |
| 197 rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); |
148 if( rc==SQLITE_OK ){ | 198 if( rc==SQLITE_OK ){ |
149 const char *zToken; | 199 const char *zToken; |
150 int nToken, iStart, iEnd, iPosition; | 200 int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; |
151 int nByte; /* total space to allocate */ | 201 int nByte; /* total space to allocate */ |
152 | 202 |
153 pCursor->pTokenizer = pTokenizer; | |
154 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); | 203 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
155 | |
156 if( rc==SQLITE_OK ){ | 204 if( rc==SQLITE_OK ){ |
157 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; | 205 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
158 pRet = (Fts3Expr *)fts3MallocZero(nByte); | 206 pRet = (Fts3Expr *)fts3MallocZero(nByte); |
159 if( !pRet ){ | 207 if( !pRet ){ |
160 rc = SQLITE_NOMEM; | 208 rc = SQLITE_NOMEM; |
161 }else{ | 209 }else{ |
162 pRet->eType = FTSQUERY_PHRASE; | 210 pRet->eType = FTSQUERY_PHRASE; |
163 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; | 211 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
164 pRet->pPhrase->nToken = 1; | 212 pRet->pPhrase->nToken = 1; |
165 pRet->pPhrase->iColumn = iCol; | 213 pRet->pPhrase->iColumn = iCol; |
166 pRet->pPhrase->aToken[0].n = nToken; | 214 pRet->pPhrase->aToken[0].n = nToken; |
167 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; | 215 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; |
168 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); | 216 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
169 | 217 |
170 if( iEnd<n && z[iEnd]=='*' ){ | 218 if( iEnd<n && z[iEnd]=='*' ){ |
171 pRet->pPhrase->aToken[0].isPrefix = 1; | 219 pRet->pPhrase->aToken[0].isPrefix = 1; |
172 iEnd++; | 220 iEnd++; |
173 } | 221 } |
174 if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ | 222 |
175 pRet->pPhrase->isNot = 1; | 223 while( 1 ){ |
| 224 if( !sqlite3_fts3_enable_parentheses |
| 225 && iStart>0 && z[iStart-1]=='-' |
| 226 ){ |
| 227 pParse->isNot = 1; |
| 228 iStart--; |
| 229 }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ |
| 230 pRet->pPhrase->aToken[0].bFirst = 1; |
| 231 iStart--; |
| 232 }else{ |
| 233 break; |
| 234 } |
176 } | 235 } |
| 236 |
177 } | 237 } |
178 nConsumed = iEnd; | 238 *pnConsumed = iEnd; |
| 239 }else if( i && rc==SQLITE_DONE ){ |
| 240 rc = SQLITE_OK; |
179 } | 241 } |
180 | 242 |
181 pModule->xClose(pCursor); | 243 pModule->xClose(pCursor); |
182 } | 244 } |
183 | 245 |
184 *pnConsumed = nConsumed; | |
185 *ppExpr = pRet; | 246 *ppExpr = pRet; |
186 return rc; | 247 return rc; |
187 } | 248 } |
188 | 249 |
189 | 250 |
190 /* | 251 /* |
191 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, | 252 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
192 ** then free the old allocation. | 253 ** then free the old allocation. |
193 */ | 254 */ |
194 static void *fts3ReallocOrFree(void *pOrig, int nNew){ | 255 static void *fts3ReallocOrFree(void *pOrig, int nNew){ |
(...skipping 22 matching lines...) Expand all Loading... |
217 Fts3Expr **ppExpr /* OUT: expression */ | 278 Fts3Expr **ppExpr /* OUT: expression */ |
218 ){ | 279 ){ |
219 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; | 280 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
220 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | 281 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
221 int rc; | 282 int rc; |
222 Fts3Expr *p = 0; | 283 Fts3Expr *p = 0; |
223 sqlite3_tokenizer_cursor *pCursor = 0; | 284 sqlite3_tokenizer_cursor *pCursor = 0; |
224 char *zTemp = 0; | 285 char *zTemp = 0; |
225 int nTemp = 0; | 286 int nTemp = 0; |
226 | 287 |
227 rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); | 288 const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
| 289 int nToken = 0; |
| 290 |
| 291 /* The final Fts3Expr data structure, including the Fts3Phrase, |
| 292 ** Fts3PhraseToken structures token buffers are all stored as a single |
| 293 ** allocation so that the expression can be freed with a single call to |
| 294 ** sqlite3_free(). Setting this up requires a two pass approach. |
| 295 ** |
| 296 ** The first pass, in the block below, uses a tokenizer cursor to iterate |
| 297 ** through the tokens in the expression. This pass uses fts3ReallocOrFree() |
| 298 ** to assemble data in two dynamic buffers: |
| 299 ** |
| 300 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase |
| 301 ** structure, followed by the array of Fts3PhraseToken |
| 302 ** structures. This pass only populates the Fts3PhraseToken array. |
| 303 ** |
| 304 ** Buffer zTemp: Contains copies of all tokens. |
| 305 ** |
| 306 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, |
| 307 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase |
| 308 ** structures. |
| 309 */ |
| 310 rc = sqlite3Fts3OpenTokenizer( |
| 311 pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); |
228 if( rc==SQLITE_OK ){ | 312 if( rc==SQLITE_OK ){ |
229 int ii; | 313 int ii; |
230 pCursor->pTokenizer = pTokenizer; | |
231 for(ii=0; rc==SQLITE_OK; ii++){ | 314 for(ii=0; rc==SQLITE_OK; ii++){ |
232 const char *zToken; | 315 const char *zByte; |
233 int nToken, iBegin, iEnd, iPos; | 316 int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; |
234 rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); | 317 rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); |
235 if( rc==SQLITE_OK ){ | 318 if( rc==SQLITE_OK ){ |
236 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 319 Fts3PhraseToken *pToken; |
237 p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); | 320 |
238 zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); | 321 p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); |
239 if( !p || !zTemp ){ | 322 if( !p ) goto no_mem; |
240 goto no_mem; | 323 |
241 } | 324 zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); |
242 if( ii==0 ){ | 325 if( !zTemp ) goto no_mem; |
243 memset(p, 0, nByte); | 326 |
244 p->pPhrase = (Fts3Phrase *)&p[1]; | 327 assert( nToken==ii ); |
245 } | 328 pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; |
246 p->pPhrase = (Fts3Phrase *)&p[1]; | 329 memset(pToken, 0, sizeof(Fts3PhraseToken)); |
247 memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); | 330 |
248 p->pPhrase->nToken = ii+1; | 331 memcpy(&zTemp[nTemp], zByte, nByte); |
249 p->pPhrase->aToken[ii].n = nToken; | 332 nTemp += nByte; |
250 memcpy(&zTemp[nTemp], zToken, nToken); | 333 |
251 nTemp += nToken; | 334 pToken->n = nByte; |
252 if( iEnd<nInput && zInput[iEnd]=='*' ){ | 335 pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); |
253 p->pPhrase->aToken[ii].isPrefix = 1; | 336 pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); |
254 }else{ | 337 nToken = ii+1; |
255 p->pPhrase->aToken[ii].isPrefix = 0; | |
256 } | |
257 } | 338 } |
258 } | 339 } |
259 | 340 |
260 pModule->xClose(pCursor); | 341 pModule->xClose(pCursor); |
261 pCursor = 0; | 342 pCursor = 0; |
262 } | 343 } |
263 | 344 |
264 if( rc==SQLITE_DONE ){ | 345 if( rc==SQLITE_DONE ){ |
265 int jj; | 346 int jj; |
266 char *zNew = NULL; | 347 char *zBuf = 0; |
267 int nNew = 0; | 348 |
268 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 349 p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); |
269 nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); | 350 if( !p ) goto no_mem; |
270 p = fts3ReallocOrFree(p, nByte + nTemp); | 351 memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); |
271 if( !p ){ | 352 p->eType = FTSQUERY_PHRASE; |
272 goto no_mem; | 353 p->pPhrase = (Fts3Phrase *)&p[1]; |
| 354 p->pPhrase->iColumn = pParse->iDefaultCol; |
| 355 p->pPhrase->nToken = nToken; |
| 356 |
| 357 zBuf = (char *)&p->pPhrase->aToken[nToken]; |
| 358 if( zTemp ){ |
| 359 memcpy(zBuf, zTemp, nTemp); |
| 360 sqlite3_free(zTemp); |
| 361 }else{ |
| 362 assert( nTemp==0 ); |
273 } | 363 } |
274 if( zTemp ){ | 364 |
275 zNew = &(((char *)p)[nByte]); | 365 for(jj=0; jj<p->pPhrase->nToken; jj++){ |
276 memcpy(zNew, zTemp, nTemp); | 366 p->pPhrase->aToken[jj].z = zBuf; |
277 }else{ | 367 zBuf += p->pPhrase->aToken[jj].n; |
278 memset(p, 0, nByte+nTemp); | |
279 } | 368 } |
280 p->pPhrase = (Fts3Phrase *)&p[1]; | |
281 for(jj=0; jj<p->pPhrase->nToken; jj++){ | |
282 p->pPhrase->aToken[jj].z = &zNew[nNew]; | |
283 nNew += p->pPhrase->aToken[jj].n; | |
284 } | |
285 sqlite3_free(zTemp); | |
286 p->eType = FTSQUERY_PHRASE; | |
287 p->pPhrase->iColumn = pParse->iDefaultCol; | |
288 rc = SQLITE_OK; | 369 rc = SQLITE_OK; |
289 } | 370 } |
290 | 371 |
291 *ppExpr = p; | 372 *ppExpr = p; |
292 return rc; | 373 return rc; |
293 no_mem: | 374 no_mem: |
294 | 375 |
295 if( pCursor ){ | 376 if( pCursor ){ |
296 pModule->xClose(pCursor); | 377 pModule->xClose(pCursor); |
297 } | 378 } |
298 sqlite3_free(zTemp); | 379 sqlite3_free(zTemp); |
299 sqlite3_free(p); | 380 sqlite3_free(p); |
300 *ppExpr = 0; | 381 *ppExpr = 0; |
301 return SQLITE_NOMEM; | 382 return SQLITE_NOMEM; |
302 } | 383 } |
303 | 384 |
304 /* | 385 /* |
305 ** Function getNextNode(), which is called by fts3ExprParse(), may itself | |
306 ** call fts3ExprParse(). So this forward declaration is required. | |
307 */ | |
308 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); | |
309 | |
310 /* | |
311 ** The output variable *ppExpr is populated with an allocated Fts3Expr | 386 ** The output variable *ppExpr is populated with an allocated Fts3Expr |
312 ** structure, or set to 0 if the end of the input buffer is reached. | 387 ** structure, or set to 0 if the end of the input buffer is reached. |
313 ** | 388 ** |
314 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM | 389 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM |
315 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. | 390 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. |
316 ** If SQLITE_ERROR is returned, pContext is populated with an error message. | 391 ** If SQLITE_ERROR is returned, pContext is populated with an error message. |
317 */ | 392 */ |
318 static int getNextNode( | 393 static int getNextNode( |
319 ParseContext *pParse, /* fts3 query parse context */ | 394 ParseContext *pParse, /* fts3 query parse context */ |
320 const char *z, int n, /* Input string */ | 395 const char *z, int n, /* Input string */ |
(...skipping 13 matching lines...) Expand all Loading... |
334 }; | 409 }; |
335 int ii; | 410 int ii; |
336 int iCol; | 411 int iCol; |
337 int iColLen; | 412 int iColLen; |
338 int rc; | 413 int rc; |
339 Fts3Expr *pRet = 0; | 414 Fts3Expr *pRet = 0; |
340 | 415 |
341 const char *zInput = z; | 416 const char *zInput = z; |
342 int nInput = n; | 417 int nInput = n; |
343 | 418 |
| 419 pParse->isNot = 0; |
| 420 |
344 /* Skip over any whitespace before checking for a keyword, an open or | 421 /* Skip over any whitespace before checking for a keyword, an open or |
345 ** close bracket, or a quoted string. | 422 ** close bracket, or a quoted string. |
346 */ | 423 */ |
347 while( nInput>0 && fts3isspace(*zInput) ){ | 424 while( nInput>0 && fts3isspace(*zInput) ){ |
348 nInput--; | 425 nInput--; |
349 zInput++; | 426 zInput++; |
350 } | 427 } |
351 if( nInput==0 ){ | 428 if( nInput==0 ){ |
352 return SQLITE_DONE; | 429 return SQLITE_DONE; |
353 } | 430 } |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
394 *pnConsumed = (int)((zInput - z) + nKey); | 471 *pnConsumed = (int)((zInput - z) + nKey); |
395 return SQLITE_OK; | 472 return SQLITE_OK; |
396 } | 473 } |
397 | 474 |
398 /* Turns out that wasn't a keyword after all. This happens if the | 475 /* Turns out that wasn't a keyword after all. This happens if the |
399 ** user has supplied a token such as "ORacle". Continue. | 476 ** user has supplied a token such as "ORacle". Continue. |
400 */ | 477 */ |
401 } | 478 } |
402 } | 479 } |
403 | 480 |
404 /* Check for an open bracket. */ | |
405 if( sqlite3_fts3_enable_parentheses ){ | |
406 if( *zInput=='(' ){ | |
407 int nConsumed; | |
408 pParse->nNest++; | |
409 rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); | |
410 if( rc==SQLITE_OK && !*ppExpr ){ | |
411 rc = SQLITE_DONE; | |
412 } | |
413 *pnConsumed = (int)((zInput - z) + 1 + nConsumed); | |
414 return rc; | |
415 } | |
416 | |
417 /* Check for a close bracket. */ | |
418 if( *zInput==')' ){ | |
419 pParse->nNest--; | |
420 *pnConsumed = (int)((zInput - z) + 1); | |
421 return SQLITE_DONE; | |
422 } | |
423 } | |
424 | |
425 /* See if we are dealing with a quoted phrase. If this is the case, then | 481 /* See if we are dealing with a quoted phrase. If this is the case, then |
426 ** search for the closing quote and pass the whole string to getNextString() | 482 ** search for the closing quote and pass the whole string to getNextString() |
427 ** for processing. This is easy to do, as fts3 has no syntax for escaping | 483 ** for processing. This is easy to do, as fts3 has no syntax for escaping |
428 ** a quote character embedded in a string. | 484 ** a quote character embedded in a string. |
429 */ | 485 */ |
430 if( *zInput=='"' ){ | 486 if( *zInput=='"' ){ |
431 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); | 487 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); |
432 *pnConsumed = (int)((zInput - z) + ii + 1); | 488 *pnConsumed = (int)((zInput - z) + ii + 1); |
433 if( ii==nInput ){ | 489 if( ii==nInput ){ |
434 return SQLITE_ERROR; | 490 return SQLITE_ERROR; |
435 } | 491 } |
436 return getNextString(pParse, &zInput[1], ii-1, ppExpr); | 492 return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
437 } | 493 } |
438 | 494 |
| 495 if( sqlite3_fts3_enable_parentheses ){ |
| 496 if( *zInput=='(' ){ |
| 497 int nConsumed = 0; |
| 498 pParse->nNest++; |
| 499 rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); |
| 500 if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } |
| 501 *pnConsumed = (int)(zInput - z) + 1 + nConsumed; |
| 502 return rc; |
| 503 }else if( *zInput==')' ){ |
| 504 pParse->nNest--; |
| 505 *pnConsumed = (int)((zInput - z) + 1); |
| 506 *ppExpr = 0; |
| 507 return SQLITE_DONE; |
| 508 } |
| 509 } |
439 | 510 |
440 /* If control flows to this point, this must be a regular token, or | 511 /* If control flows to this point, this must be a regular token, or |
441 ** the end of the input. Read a regular token using the sqlite3_tokenizer | 512 ** the end of the input. Read a regular token using the sqlite3_tokenizer |
442 ** interface. Before doing so, figure out if there is an explicit | 513 ** interface. Before doing so, figure out if there is an explicit |
443 ** column specifier for the token. | 514 ** column specifier for the token. |
444 ** | 515 ** |
445 ** TODO: Strangely, it is not possible to associate a column specifier | 516 ** TODO: Strangely, it is not possible to associate a column specifier |
446 ** with a quoted phrase, only with a single token. Not sure if this was | 517 ** with a quoted phrase, only with a single token. Not sure if this was |
447 ** an implementation artifact or an intentional decision when fts3 was | 518 ** an implementation artifact or an intentional decision when fts3 was |
448 ** first implemented. Whichever it was, this module duplicates the | 519 ** first implemented. Whichever it was, this module duplicates the |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
547 Fts3Expr *pPrev = 0; | 618 Fts3Expr *pPrev = 0; |
548 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ | 619 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ |
549 int nIn = n; | 620 int nIn = n; |
550 const char *zIn = z; | 621 const char *zIn = z; |
551 int rc = SQLITE_OK; | 622 int rc = SQLITE_OK; |
552 int isRequirePhrase = 1; | 623 int isRequirePhrase = 1; |
553 | 624 |
554 while( rc==SQLITE_OK ){ | 625 while( rc==SQLITE_OK ){ |
555 Fts3Expr *p = 0; | 626 Fts3Expr *p = 0; |
556 int nByte = 0; | 627 int nByte = 0; |
| 628 |
557 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); | 629 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
| 630 assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); |
558 if( rc==SQLITE_OK ){ | 631 if( rc==SQLITE_OK ){ |
559 int isPhrase; | 632 if( p ){ |
| 633 int isPhrase; |
560 | 634 |
561 if( !sqlite3_fts3_enable_parentheses | 635 if( !sqlite3_fts3_enable_parentheses |
562 && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot | 636 && p->eType==FTSQUERY_PHRASE && pParse->isNot |
563 ){ | 637 ){ |
564 /* Create an implicit NOT operator. */ | 638 /* Create an implicit NOT operator. */ |
565 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); | 639 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
566 if( !pNot ){ | 640 if( !pNot ){ |
567 sqlite3Fts3ExprFree(p); | |
568 rc = SQLITE_NOMEM; | |
569 goto exprparse_out; | |
570 } | |
571 pNot->eType = FTSQUERY_NOT; | |
572 pNot->pRight = p; | |
573 if( pNotBranch ){ | |
574 pNot->pLeft = pNotBranch; | |
575 } | |
576 pNotBranch = pNot; | |
577 p = pPrev; | |
578 }else{ | |
579 int eType = p->eType; | |
580 assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); | |
581 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); | |
582 | |
583 /* The isRequirePhrase variable is set to true if a phrase or | |
584 ** an expression contained in parenthesis is required. If a | |
585 ** binary operator (AND, OR, NOT or NEAR) is encounted when | |
586 ** isRequirePhrase is set, this is a syntax error. | |
587 */ | |
588 if( !isPhrase && isRequirePhrase ){ | |
589 sqlite3Fts3ExprFree(p); | |
590 rc = SQLITE_ERROR; | |
591 goto exprparse_out; | |
592 } | |
593 | |
594 if( isPhrase && !isRequirePhrase ){ | |
595 /* Insert an implicit AND operator. */ | |
596 Fts3Expr *pAnd; | |
597 assert( pRet && pPrev ); | |
598 pAnd = fts3MallocZero(sizeof(Fts3Expr)); | |
599 if( !pAnd ){ | |
600 sqlite3Fts3ExprFree(p); | 641 sqlite3Fts3ExprFree(p); |
601 rc = SQLITE_NOMEM; | 642 rc = SQLITE_NOMEM; |
602 goto exprparse_out; | 643 goto exprparse_out; |
603 } | 644 } |
604 pAnd->eType = FTSQUERY_AND; | 645 pNot->eType = FTSQUERY_NOT; |
605 insertBinaryOperator(&pRet, pPrev, pAnd); | 646 pNot->pRight = p; |
606 pPrev = pAnd; | 647 p->pParent = pNot; |
607 } | 648 if( pNotBranch ){ |
| 649 pNot->pLeft = pNotBranch; |
| 650 pNotBranch->pParent = pNot; |
| 651 } |
| 652 pNotBranch = pNot; |
| 653 p = pPrev; |
| 654 }else{ |
| 655 int eType = p->eType; |
| 656 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
608 | 657 |
609 /* This test catches attempts to make either operand of a NEAR | 658 /* The isRequirePhrase variable is set to true if a phrase or |
610 ** operator something other than a phrase. For example, either of | 659 ** an expression contained in parenthesis is required. If a |
611 ** the following: | 660 ** binary operator (AND, OR, NOT or NEAR) is encounted when |
612 ** | 661 ** isRequirePhrase is set, this is a syntax error. |
613 ** (bracketed expression) NEAR phrase | 662 */ |
614 ** phrase NEAR (bracketed expression) | 663 if( !isPhrase && isRequirePhrase ){ |
615 ** | 664 sqlite3Fts3ExprFree(p); |
616 ** Return an error in either case. | 665 rc = SQLITE_ERROR; |
617 */ | 666 goto exprparse_out; |
618 if( pPrev && ( | 667 } |
| 668 |
| 669 if( isPhrase && !isRequirePhrase ){ |
| 670 /* Insert an implicit AND operator. */ |
| 671 Fts3Expr *pAnd; |
| 672 assert( pRet && pPrev ); |
| 673 pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
| 674 if( !pAnd ){ |
| 675 sqlite3Fts3ExprFree(p); |
| 676 rc = SQLITE_NOMEM; |
| 677 goto exprparse_out; |
| 678 } |
| 679 pAnd->eType = FTSQUERY_AND; |
| 680 insertBinaryOperator(&pRet, pPrev, pAnd); |
| 681 pPrev = pAnd; |
| 682 } |
| 683 |
| 684 /* This test catches attempts to make either operand of a NEAR |
| 685 ** operator something other than a phrase. For example, either of |
| 686 ** the following: |
| 687 ** |
| 688 ** (bracketed expression) NEAR phrase |
| 689 ** phrase NEAR (bracketed expression) |
| 690 ** |
| 691 ** Return an error in either case. |
| 692 */ |
| 693 if( pPrev && ( |
619 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) | 694 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) |
620 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) | 695 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) |
621 )){ | 696 )){ |
622 sqlite3Fts3ExprFree(p); | 697 sqlite3Fts3ExprFree(p); |
623 rc = SQLITE_ERROR; | 698 rc = SQLITE_ERROR; |
624 goto exprparse_out; | 699 goto exprparse_out; |
| 700 } |
| 701 |
| 702 if( isPhrase ){ |
| 703 if( pRet ){ |
| 704 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
| 705 pPrev->pRight = p; |
| 706 p->pParent = pPrev; |
| 707 }else{ |
| 708 pRet = p; |
| 709 } |
| 710 }else{ |
| 711 insertBinaryOperator(&pRet, pPrev, p); |
| 712 } |
| 713 isRequirePhrase = !isPhrase; |
625 } | 714 } |
626 | 715 pPrev = p; |
627 if( isPhrase ){ | |
628 if( pRet ){ | |
629 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); | |
630 pPrev->pRight = p; | |
631 p->pParent = pPrev; | |
632 }else{ | |
633 pRet = p; | |
634 } | |
635 }else{ | |
636 insertBinaryOperator(&pRet, pPrev, p); | |
637 } | |
638 isRequirePhrase = !isPhrase; | |
639 } | 716 } |
640 assert( nByte>0 ); | 717 assert( nByte>0 ); |
641 } | 718 } |
642 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); | 719 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); |
643 nIn -= nByte; | 720 nIn -= nByte; |
644 zIn += nByte; | 721 zIn += nByte; |
645 pPrev = p; | |
646 } | 722 } |
647 | 723 |
648 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ | 724 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ |
649 rc = SQLITE_ERROR; | 725 rc = SQLITE_ERROR; |
650 } | 726 } |
651 | 727 |
652 if( rc==SQLITE_DONE ){ | 728 if( rc==SQLITE_DONE ){ |
653 rc = SQLITE_OK; | 729 rc = SQLITE_OK; |
654 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ | 730 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ |
655 if( !pRet ){ | 731 if( !pRet ){ |
656 rc = SQLITE_ERROR; | 732 rc = SQLITE_ERROR; |
657 }else{ | 733 }else{ |
658 Fts3Expr *pIter = pNotBranch; | 734 Fts3Expr *pIter = pNotBranch; |
659 while( pIter->pLeft ){ | 735 while( pIter->pLeft ){ |
660 pIter = pIter->pLeft; | 736 pIter = pIter->pLeft; |
661 } | 737 } |
662 pIter->pLeft = pRet; | 738 pIter->pLeft = pRet; |
| 739 pRet->pParent = pIter; |
663 pRet = pNotBranch; | 740 pRet = pNotBranch; |
664 } | 741 } |
665 } | 742 } |
666 } | 743 } |
667 *pnConsumed = n - nIn; | 744 *pnConsumed = n - nIn; |
668 | 745 |
669 exprparse_out: | 746 exprparse_out: |
670 if( rc!=SQLITE_OK ){ | 747 if( rc!=SQLITE_OK ){ |
671 sqlite3Fts3ExprFree(pRet); | 748 sqlite3Fts3ExprFree(pRet); |
672 sqlite3Fts3ExprFree(pNotBranch); | 749 sqlite3Fts3ExprFree(pNotBranch); |
673 pRet = 0; | 750 pRet = 0; |
674 } | 751 } |
675 *ppExpr = pRet; | 752 *ppExpr = pRet; |
676 return rc; | 753 return rc; |
677 } | 754 } |
678 | 755 |
679 /* | 756 /* |
| 757 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed |
| 758 ** as the only argument is more than nMaxDepth. |
| 759 */ |
| 760 static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ |
| 761 int rc = SQLITE_OK; |
| 762 if( p ){ |
| 763 if( nMaxDepth<0 ){ |
| 764 rc = SQLITE_TOOBIG; |
| 765 }else{ |
| 766 rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); |
| 767 if( rc==SQLITE_OK ){ |
| 768 rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); |
| 769 } |
| 770 } |
| 771 } |
| 772 return rc; |
| 773 } |
| 774 |
| 775 /* |
| 776 ** This function attempts to transform the expression tree at (*pp) to |
| 777 ** an equivalent but more balanced form. The tree is modified in place. |
| 778 ** If successful, SQLITE_OK is returned and (*pp) set to point to the |
| 779 ** new root expression node. |
| 780 ** |
| 781 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree. |
| 782 ** |
| 783 ** Otherwise, if an error occurs, an SQLite error code is returned and |
| 784 ** expression (*pp) freed. |
| 785 */ |
| 786 static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ |
| 787 int rc = SQLITE_OK; /* Return code */ |
| 788 Fts3Expr *pRoot = *pp; /* Initial root node */ |
| 789 Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ |
| 790 int eType = pRoot->eType; /* Type of node in this tree */ |
| 791 |
| 792 if( nMaxDepth==0 ){ |
| 793 rc = SQLITE_ERROR; |
| 794 } |
| 795 |
| 796 if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ |
| 797 Fts3Expr **apLeaf; |
| 798 apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); |
| 799 if( 0==apLeaf ){ |
| 800 rc = SQLITE_NOMEM; |
| 801 }else{ |
| 802 memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); |
| 803 } |
| 804 |
| 805 if( rc==SQLITE_OK ){ |
| 806 int i; |
| 807 Fts3Expr *p; |
| 808 |
| 809 /* Set $p to point to the left-most leaf in the tree of eType nodes. */ |
| 810 for(p=pRoot; p->eType==eType; p=p->pLeft){ |
| 811 assert( p->pParent==0 || p->pParent->pLeft==p ); |
| 812 assert( p->pLeft && p->pRight ); |
| 813 } |
| 814 |
| 815 /* This loop runs once for each leaf in the tree of eType nodes. */ |
| 816 while( 1 ){ |
| 817 int iLvl; |
| 818 Fts3Expr *pParent = p->pParent; /* Current parent of p */ |
| 819 |
| 820 assert( pParent==0 || pParent->pLeft==p ); |
| 821 p->pParent = 0; |
| 822 if( pParent ){ |
| 823 pParent->pLeft = 0; |
| 824 }else{ |
| 825 pRoot = 0; |
| 826 } |
| 827 rc = fts3ExprBalance(&p, nMaxDepth-1); |
| 828 if( rc!=SQLITE_OK ) break; |
| 829 |
| 830 for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ |
| 831 if( apLeaf[iLvl]==0 ){ |
| 832 apLeaf[iLvl] = p; |
| 833 p = 0; |
| 834 }else{ |
| 835 assert( pFree ); |
| 836 pFree->pLeft = apLeaf[iLvl]; |
| 837 pFree->pRight = p; |
| 838 pFree->pLeft->pParent = pFree; |
| 839 pFree->pRight->pParent = pFree; |
| 840 |
| 841 p = pFree; |
| 842 pFree = pFree->pParent; |
| 843 p->pParent = 0; |
| 844 apLeaf[iLvl] = 0; |
| 845 } |
| 846 } |
| 847 if( p ){ |
| 848 sqlite3Fts3ExprFree(p); |
| 849 rc = SQLITE_TOOBIG; |
| 850 break; |
| 851 } |
| 852 |
| 853 /* If that was the last leaf node, break out of the loop */ |
| 854 if( pParent==0 ) break; |
| 855 |
| 856 /* Set $p to point to the next leaf in the tree of eType nodes */ |
| 857 for(p=pParent->pRight; p->eType==eType; p=p->pLeft); |
| 858 |
| 859 /* Remove pParent from the original tree. */ |
| 860 assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); |
| 861 pParent->pRight->pParent = pParent->pParent; |
| 862 if( pParent->pParent ){ |
| 863 pParent->pParent->pLeft = pParent->pRight; |
| 864 }else{ |
| 865 assert( pParent==pRoot ); |
| 866 pRoot = pParent->pRight; |
| 867 } |
| 868 |
| 869 /* Link pParent into the free node list. It will be used as an |
| 870 ** internal node of the new tree. */ |
| 871 pParent->pParent = pFree; |
| 872 pFree = pParent; |
| 873 } |
| 874 |
| 875 if( rc==SQLITE_OK ){ |
| 876 p = 0; |
| 877 for(i=0; i<nMaxDepth; i++){ |
| 878 if( apLeaf[i] ){ |
| 879 if( p==0 ){ |
| 880 p = apLeaf[i]; |
| 881 p->pParent = 0; |
| 882 }else{ |
| 883 assert( pFree!=0 ); |
| 884 pFree->pRight = p; |
| 885 pFree->pLeft = apLeaf[i]; |
| 886 pFree->pLeft->pParent = pFree; |
| 887 pFree->pRight->pParent = pFree; |
| 888 |
| 889 p = pFree; |
| 890 pFree = pFree->pParent; |
| 891 p->pParent = 0; |
| 892 } |
| 893 } |
| 894 } |
| 895 pRoot = p; |
| 896 }else{ |
| 897 /* An error occurred. Delete the contents of the apLeaf[] array |
| 898 ** and pFree list. Everything else is cleaned up by the call to |
| 899 ** sqlite3Fts3ExprFree(pRoot) below. */ |
| 900 Fts3Expr *pDel; |
| 901 for(i=0; i<nMaxDepth; i++){ |
| 902 sqlite3Fts3ExprFree(apLeaf[i]); |
| 903 } |
| 904 while( (pDel=pFree)!=0 ){ |
| 905 pFree = pDel->pParent; |
| 906 sqlite3_free(pDel); |
| 907 } |
| 908 } |
| 909 |
| 910 assert( pFree==0 ); |
| 911 sqlite3_free( apLeaf ); |
| 912 } |
| 913 } |
| 914 |
| 915 if( rc!=SQLITE_OK ){ |
| 916 sqlite3Fts3ExprFree(pRoot); |
| 917 pRoot = 0; |
| 918 } |
| 919 *pp = pRoot; |
| 920 return rc; |
| 921 } |
| 922 |
| 923 /* |
| 924 ** This function is similar to sqlite3Fts3ExprParse(), with the following |
| 925 ** differences: |
| 926 ** |
| 927 ** 1. It does not do expression rebalancing. |
| 928 ** 2. It does not check that the expression does not exceed the |
| 929 ** maximum allowable depth. |
| 930 ** 3. Even if it fails, *ppExpr may still be set to point to an |
| 931 ** expression tree. It should be deleted using sqlite3Fts3ExprFree() |
| 932 ** in this case. |
| 933 */ |
| 934 static int fts3ExprParseUnbalanced( |
| 935 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 936 int iLangid, /* Language id for tokenizer */ |
| 937 char **azCol, /* Array of column names for fts3 table */ |
| 938 int bFts4, /* True to allow FTS4-only syntax */ |
| 939 int nCol, /* Number of entries in azCol[] */ |
| 940 int iDefaultCol, /* Default column to query */ |
| 941 const char *z, int n, /* Text of MATCH query */ |
| 942 Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
| 943 ){ |
| 944 int nParsed; |
| 945 int rc; |
| 946 ParseContext sParse; |
| 947 |
| 948 memset(&sParse, 0, sizeof(ParseContext)); |
| 949 sParse.pTokenizer = pTokenizer; |
| 950 sParse.iLangid = iLangid; |
| 951 sParse.azCol = (const char **)azCol; |
| 952 sParse.nCol = nCol; |
| 953 sParse.iDefaultCol = iDefaultCol; |
| 954 sParse.bFts4 = bFts4; |
| 955 if( z==0 ){ |
| 956 *ppExpr = 0; |
| 957 return SQLITE_OK; |
| 958 } |
| 959 if( n<0 ){ |
| 960 n = (int)strlen(z); |
| 961 } |
| 962 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
| 963 assert( rc==SQLITE_OK || *ppExpr==0 ); |
| 964 |
| 965 /* Check for mismatched parenthesis */ |
| 966 if( rc==SQLITE_OK && sParse.nNest ){ |
| 967 rc = SQLITE_ERROR; |
| 968 } |
| 969 |
| 970 return rc; |
| 971 } |
| 972 |
| 973 /* |
680 ** Parameters z and n contain a pointer to and length of a buffer containing | 974 ** Parameters z and n contain a pointer to and length of a buffer containing |
681 ** an fts3 query expression, respectively. This function attempts to parse the | 975 ** an fts3 query expression, respectively. This function attempts to parse the |
682 ** query expression and create a tree of Fts3Expr structures representing the | 976 ** query expression and create a tree of Fts3Expr structures representing the |
683 ** parsed expression. If successful, *ppExpr is set to point to the head | 977 ** parsed expression. If successful, *ppExpr is set to point to the head |
684 ** of the parsed expression tree and SQLITE_OK is returned. If an error | 978 ** of the parsed expression tree and SQLITE_OK is returned. If an error |
685 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse | 979 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse |
686 ** error) is returned and *ppExpr is set to 0. | 980 ** error) is returned and *ppExpr is set to 0. |
687 ** | 981 ** |
688 ** If parameter n is a negative number, then z is assumed to point to a | 982 ** If parameter n is a negative number, then z is assumed to point to a |
689 ** nul-terminated string and the length is determined using strlen(). | 983 ** nul-terminated string and the length is determined using strlen(). |
690 ** | 984 ** |
691 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to | 985 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to |
692 ** use to normalize query tokens while parsing the expression. The azCol[] | 986 ** use to normalize query tokens while parsing the expression. The azCol[] |
693 ** array, which is assumed to contain nCol entries, should contain the names | 987 ** array, which is assumed to contain nCol entries, should contain the names |
694 ** of each column in the target fts3 table, in order from left to right. | 988 ** of each column in the target fts3 table, in order from left to right. |
695 ** Column names must be nul-terminated strings. | 989 ** Column names must be nul-terminated strings. |
696 ** | 990 ** |
697 ** The iDefaultCol parameter should be passed the index of the table column | 991 ** The iDefaultCol parameter should be passed the index of the table column |
698 ** that appears on the left-hand-side of the MATCH operator (the default | 992 ** that appears on the left-hand-side of the MATCH operator (the default |
699 ** column to match against for tokens for which a column name is not explicitly | 993 ** column to match against for tokens for which a column name is not explicitly |
700 ** specified as part of the query string), or -1 if tokens may by default | 994 ** specified as part of the query string), or -1 if tokens may by default |
701 ** match any table column. | 995 ** match any table column. |
702 */ | 996 */ |
703 int sqlite3Fts3ExprParse( | 997 int sqlite3Fts3ExprParse( |
704 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ | 998 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 999 int iLangid, /* Language id for tokenizer */ |
705 char **azCol, /* Array of column names for fts3 table */ | 1000 char **azCol, /* Array of column names for fts3 table */ |
| 1001 int bFts4, /* True to allow FTS4-only syntax */ |
706 int nCol, /* Number of entries in azCol[] */ | 1002 int nCol, /* Number of entries in azCol[] */ |
707 int iDefaultCol, /* Default column to query */ | 1003 int iDefaultCol, /* Default column to query */ |
708 const char *z, int n, /* Text of MATCH query */ | 1004 const char *z, int n, /* Text of MATCH query */ |
709 Fts3Expr **ppExpr /* OUT: Parsed query structure */ | 1005 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 1006 char **pzErr /* OUT: Error message (sqlite3_malloc) */ |
710 ){ | 1007 ){ |
711 int nParsed; | 1008 int rc = fts3ExprParseUnbalanced( |
712 int rc; | 1009 pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr |
713 ParseContext sParse; | 1010 ); |
714 sParse.pTokenizer = pTokenizer; | 1011 |
715 sParse.azCol = (const char **)azCol; | 1012 /* Rebalance the expression. And check that its depth does not exceed |
716 sParse.nCol = nCol; | 1013 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ |
717 sParse.iDefaultCol = iDefaultCol; | 1014 if( rc==SQLITE_OK && *ppExpr ){ |
718 sParse.nNest = 0; | 1015 rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
719 if( z==0 ){ | 1016 if( rc==SQLITE_OK ){ |
720 *ppExpr = 0; | 1017 rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
721 return SQLITE_OK; | 1018 } |
722 } | 1019 } |
723 if( n<0 ){ | |
724 n = (int)strlen(z); | |
725 } | |
726 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); | |
727 | 1020 |
728 /* Check for mismatched parenthesis */ | 1021 if( rc!=SQLITE_OK ){ |
729 if( rc==SQLITE_OK && sParse.nNest ){ | |
730 rc = SQLITE_ERROR; | |
731 sqlite3Fts3ExprFree(*ppExpr); | 1022 sqlite3Fts3ExprFree(*ppExpr); |
732 *ppExpr = 0; | 1023 *ppExpr = 0; |
| 1024 if( rc==SQLITE_TOOBIG ){ |
| 1025 *pzErr = sqlite3_mprintf( |
| 1026 "FTS expression tree is too large (maximum depth %d)", |
| 1027 SQLITE_FTS3_MAX_EXPR_DEPTH |
| 1028 ); |
| 1029 rc = SQLITE_ERROR; |
| 1030 }else if( rc==SQLITE_ERROR ){ |
| 1031 *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); |
| 1032 } |
733 } | 1033 } |
734 | 1034 |
735 return rc; | 1035 return rc; |
736 } | 1036 } |
737 | 1037 |
738 /* | 1038 /* |
| 1039 ** Free a single node of an expression tree. |
| 1040 */ |
| 1041 static void fts3FreeExprNode(Fts3Expr *p){ |
| 1042 assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); |
| 1043 sqlite3Fts3EvalPhraseCleanup(p->pPhrase); |
| 1044 sqlite3_free(p->aMI); |
| 1045 sqlite3_free(p); |
| 1046 } |
| 1047 |
| 1048 /* |
739 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). | 1049 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
| 1050 ** |
| 1051 ** This function would be simpler if it recursively called itself. But |
| 1052 ** that would mean passing a sufficiently large expression to ExprParse() |
| 1053 ** could cause a stack overflow. |
740 */ | 1054 */ |
741 void sqlite3Fts3ExprFree(Fts3Expr *p){ | 1055 void sqlite3Fts3ExprFree(Fts3Expr *pDel){ |
742 if( p ){ | 1056 Fts3Expr *p; |
743 sqlite3Fts3ExprFree(p->pLeft); | 1057 assert( pDel==0 || pDel->pParent==0 ); |
744 sqlite3Fts3ExprFree(p->pRight); | 1058 for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ |
745 sqlite3_free(p->aDoclist); | 1059 assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); |
746 sqlite3_free(p); | 1060 } |
| 1061 while( p ){ |
| 1062 Fts3Expr *pParent = p->pParent; |
| 1063 fts3FreeExprNode(p); |
| 1064 if( pParent && p==pParent->pLeft && pParent->pRight ){ |
| 1065 p = pParent->pRight; |
| 1066 while( p && (p->pLeft || p->pRight) ){ |
| 1067 assert( p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 1068 p = (p->pLeft ? p->pLeft : p->pRight); |
| 1069 } |
| 1070 }else{ |
| 1071 p = pParent; |
| 1072 } |
747 } | 1073 } |
748 } | 1074 } |
749 | 1075 |
750 /**************************************************************************** | 1076 /**************************************************************************** |
751 ***************************************************************************** | 1077 ***************************************************************************** |
752 ** Everything after this point is just test code. | 1078 ** Everything after this point is just test code. |
753 */ | 1079 */ |
754 | 1080 |
755 #ifdef SQLITE_TEST | 1081 #ifdef SQLITE_TEST |
756 | 1082 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
788 ** Return a pointer to a buffer containing a text representation of the | 1114 ** Return a pointer to a buffer containing a text representation of the |
789 ** expression passed as the first argument. The buffer is obtained from | 1115 ** expression passed as the first argument. The buffer is obtained from |
790 ** sqlite3_malloc(). It is the responsibility of the caller to use | 1116 ** sqlite3_malloc(). It is the responsibility of the caller to use |
791 ** sqlite3_free() to release the memory. If an OOM condition is encountered, | 1117 ** sqlite3_free() to release the memory. If an OOM condition is encountered, |
792 ** NULL is returned. | 1118 ** NULL is returned. |
793 ** | 1119 ** |
794 ** If the second argument is not NULL, then its contents are prepended to | 1120 ** If the second argument is not NULL, then its contents are prepended to |
795 ** the returned expression text and then freed using sqlite3_free(). | 1121 ** the returned expression text and then freed using sqlite3_free(). |
796 */ | 1122 */ |
797 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ | 1123 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
| 1124 if( pExpr==0 ){ |
| 1125 return sqlite3_mprintf(""); |
| 1126 } |
798 switch( pExpr->eType ){ | 1127 switch( pExpr->eType ){ |
799 case FTSQUERY_PHRASE: { | 1128 case FTSQUERY_PHRASE: { |
800 Fts3Phrase *pPhrase = pExpr->pPhrase; | 1129 Fts3Phrase *pPhrase = pExpr->pPhrase; |
801 int i; | 1130 int i; |
802 zBuf = sqlite3_mprintf( | 1131 zBuf = sqlite3_mprintf( |
803 "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); | 1132 "%zPHRASE %d 0", zBuf, pPhrase->iColumn); |
804 for(i=0; zBuf && i<pPhrase->nToken; i++){ | 1133 for(i=0; zBuf && i<pPhrase->nToken; i++){ |
805 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, | 1134 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
806 pPhrase->aToken[i].n, pPhrase->aToken[i].z, | 1135 pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
807 (pPhrase->aToken[i].isPrefix?"+":"") | 1136 (pPhrase->aToken[i].isPrefix?"+":"") |
808 ); | 1137 ); |
809 } | 1138 } |
810 return zBuf; | 1139 return zBuf; |
811 } | 1140 } |
812 | 1141 |
813 case FTSQUERY_NEAR: | 1142 case FTSQUERY_NEAR: |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
895 nCol = argc-2; | 1224 nCol = argc-2; |
896 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); | 1225 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); |
897 if( !azCol ){ | 1226 if( !azCol ){ |
898 sqlite3_result_error_nomem(context); | 1227 sqlite3_result_error_nomem(context); |
899 goto exprtest_out; | 1228 goto exprtest_out; |
900 } | 1229 } |
901 for(ii=0; ii<nCol; ii++){ | 1230 for(ii=0; ii<nCol; ii++){ |
902 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); | 1231 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
903 } | 1232 } |
904 | 1233 |
905 rc = sqlite3Fts3ExprParse( | 1234 if( sqlite3_user_data(context) ){ |
906 pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr | 1235 char *zDummy = 0; |
907 ); | 1236 rc = sqlite3Fts3ExprParse( |
| 1237 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy |
| 1238 ); |
| 1239 assert( rc==SQLITE_OK || pExpr==0 ); |
| 1240 sqlite3_free(zDummy); |
| 1241 }else{ |
| 1242 rc = fts3ExprParseUnbalanced( |
| 1243 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr |
| 1244 ); |
| 1245 } |
| 1246 |
908 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ | 1247 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
| 1248 sqlite3Fts3ExprFree(pExpr); |
909 sqlite3_result_error(context, "Error parsing expression", -1); | 1249 sqlite3_result_error(context, "Error parsing expression", -1); |
910 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ | 1250 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
911 sqlite3_result_error_nomem(context); | 1251 sqlite3_result_error_nomem(context); |
912 }else{ | 1252 }else{ |
913 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); | 1253 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); |
914 sqlite3_free(zBuf); | 1254 sqlite3_free(zBuf); |
915 } | 1255 } |
916 | 1256 |
917 sqlite3Fts3ExprFree(pExpr); | 1257 sqlite3Fts3ExprFree(pExpr); |
918 | 1258 |
919 exprtest_out: | 1259 exprtest_out: |
920 if( pModule && pTokenizer ){ | 1260 if( pModule && pTokenizer ){ |
921 rc = pModule->xDestroy(pTokenizer); | 1261 rc = pModule->xDestroy(pTokenizer); |
922 } | 1262 } |
923 sqlite3_free(azCol); | 1263 sqlite3_free(azCol); |
924 } | 1264 } |
925 | 1265 |
926 /* | 1266 /* |
927 ** Register the query expression parser test function fts3_exprtest() | 1267 ** Register the query expression parser test function fts3_exprtest() |
928 ** with database connection db. | 1268 ** with database connection db. |
929 */ | 1269 */ |
930 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ | 1270 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
931 return sqlite3_create_function( | 1271 int rc = sqlite3_create_function( |
932 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 | 1272 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
933 ); | 1273 ); |
| 1274 if( rc==SQLITE_OK ){ |
| 1275 rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", |
| 1276 -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 |
| 1277 ); |
| 1278 } |
| 1279 return rc; |
934 } | 1280 } |
935 | 1281 |
936 #endif | 1282 #endif |
937 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 1283 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
OLD | NEW |