OLD | NEW |
1 /* | 1 /* |
2 ** 2008 Nov 28 | 2 ** 2008 Nov 28 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
11 ****************************************************************************** | 11 ****************************************************************************** |
12 ** | 12 ** |
13 ** This module contains code that implements a parser for fts3 query strings | 13 ** This module contains code that implements a parser for fts3 query strings |
14 ** (the right-hand argument to the MATCH operator). Because the supported | 14 ** (the right-hand argument to the MATCH operator). Because the supported |
15 ** syntax is relatively simple, the whole tokenizer/parser system is | 15 ** syntax is relatively simple, the whole tokenizer/parser system is |
16 ** hand-coded. The public interface to this module is declared in source | 16 ** hand-coded. |
17 ** code file "fts3_expr.h". | |
18 */ | 17 */ |
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 18 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
20 | 19 |
21 /* | 20 /* |
22 ** By default, this module parses the legacy syntax that has been | 21 ** By default, this module parses the legacy syntax that has been |
23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS | 22 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS |
24 ** is defined, then it uses the new syntax. The differences between | 23 ** is defined, then it uses the new syntax. The differences between |
25 ** the new and the old syntaxes are: | 24 ** the new and the old syntaxes are: |
26 ** | 25 ** |
27 ** a) The new syntax supports parenthesis. The old does not. | 26 ** a) The new syntax supports parenthesis. The old does not. |
28 ** | 27 ** |
29 ** b) The new syntax supports the AND and NOT operators. The old does not. | 28 ** b) The new syntax supports the AND and NOT operators. The old does not. |
30 ** | 29 ** |
31 ** c) The old syntax supports the "-" token qualifier. This is not | 30 ** c) The old syntax supports the "-" token qualifier. This is not |
32 ** supported by the new syntax (it is replaced by the NOT operator). | 31 ** supported by the new syntax (it is replaced by the NOT operator). |
33 ** | 32 ** |
34 ** d) When using the old syntax, the OR operator has a greater precedence | 33 ** d) When using the old syntax, the OR operator has a greater precedence |
35 ** than an implicit AND. When using the new, both implicity and explicit | 34 ** than an implicit AND. When using the new, both implicity and explicit |
36 ** AND operators have a higher precedence than OR. | 35 ** AND operators have a higher precedence than OR. |
37 ** | 36 ** |
38 ** If compiled with SQLITE_TEST defined, then this module exports the | 37 ** If compiled with SQLITE_TEST defined, then this module exports the |
39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable | 38 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable |
40 ** to zero causes the module to use the old syntax. If it is set to | 39 ** to zero causes the module to use the old syntax. If it is set to |
41 ** non-zero the new syntax is activated. This is so both syntaxes can | 40 ** non-zero the new syntax is activated. This is so both syntaxes can |
42 ** be tested using a single build of testfixture. | 41 ** be tested using a single build of testfixture. |
| 42 ** |
| 43 ** The following describes the syntax supported by the fts3 MATCH |
| 44 ** operator in a similar format to that used by the lemon parser |
| 45 ** generator. This module does not use actually lemon, it uses a |
| 46 ** custom parser. |
| 47 ** |
| 48 ** query ::= andexpr (OR andexpr)*. |
| 49 ** |
| 50 ** andexpr ::= notexpr (AND? notexpr)*. |
| 51 ** |
| 52 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. |
| 53 ** notexpr ::= LP query RP. |
| 54 ** |
| 55 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. |
| 56 ** |
| 57 ** distance_opt ::= . |
| 58 ** distance_opt ::= / INTEGER. |
| 59 ** |
| 60 ** phrase ::= TOKEN. |
| 61 ** phrase ::= COLUMN:TOKEN. |
| 62 ** phrase ::= "TOKEN TOKEN TOKEN...". |
43 */ | 63 */ |
| 64 |
44 #ifdef SQLITE_TEST | 65 #ifdef SQLITE_TEST |
45 int sqlite3_fts3_enable_parentheses = 0; | 66 int sqlite3_fts3_enable_parentheses = 0; |
46 #else | 67 #else |
47 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS | 68 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS |
48 # define sqlite3_fts3_enable_parentheses 1 | 69 # define sqlite3_fts3_enable_parentheses 1 |
49 # else | 70 # else |
50 # define sqlite3_fts3_enable_parentheses 0 | 71 # define sqlite3_fts3_enable_parentheses 0 |
51 # endif | 72 # endif |
52 #endif | 73 #endif |
53 | 74 |
54 /* | 75 /* |
55 ** Default span for NEAR operators. | 76 ** Default span for NEAR operators. |
56 */ | 77 */ |
57 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 | 78 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
58 | 79 |
59 #include "fts3_expr.h" | 80 #include "fts3Int.h" |
60 #include "sqlite3.h" | |
61 #include <string.h> | 81 #include <string.h> |
62 #include <assert.h> | 82 #include <assert.h> |
63 | 83 |
64 typedef struct ParseContext ParseContext; | 84 typedef struct ParseContext ParseContext; |
65 struct ParseContext { | 85 struct ParseContext { |
66 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ | 86 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
67 const char **azCol; /* Array of column names for fts3 table */ | 87 const char **azCol; /* Array of column names for fts3 table */ |
68 int nCol; /* Number of entries in azCol[] */ | 88 int nCol; /* Number of entries in azCol[] */ |
69 int iDefaultCol; /* Default column to query */ | 89 int iDefaultCol; /* Default column to query */ |
70 sqlite3_context *pCtx; /* Write error message here */ | 90 sqlite3_context *pCtx; /* Write error message here */ |
71 int nNest; /* Number of nested brackets */ | 91 int nNest; /* Number of nested brackets */ |
72 }; | 92 }; |
73 | 93 |
74 /* | 94 /* |
75 ** This function is equivalent to the standard isspace() function. | 95 ** This function is equivalent to the standard isspace() function. |
76 ** | 96 ** |
77 ** The standard isspace() can be awkward to use safely, because although it | 97 ** The standard isspace() can be awkward to use safely, because although it |
78 ** is defined to accept an argument of type int, its behaviour when passed | 98 ** is defined to accept an argument of type int, its behaviour when passed |
79 ** an integer that falls outside of the range of the unsigned char type | 99 ** an integer that falls outside of the range of the unsigned char type |
80 ** is undefined (and sometimes, "undefined" means segfault). This wrapper | 100 ** is undefined (and sometimes, "undefined" means segfault). This wrapper |
81 ** is defined to accept an argument of type char, and always returns 0 for | 101 ** is defined to accept an argument of type char, and always returns 0 for |
82 ** any values that fall outside of the range of the unsigned char type (i.e. | 102 ** any values that fall outside of the range of the unsigned char type (i.e. |
83 ** negative values). | 103 ** negative values). |
84 */ | 104 */ |
85 static int fts3isspace(char c){ | 105 static int fts3isspace(char c){ |
86 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; | 106 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
87 } | 107 } |
88 | 108 |
89 /* | 109 /* |
| 110 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, |
| 111 ** zero the memory before returning a pointer to it. If unsuccessful, |
| 112 ** return NULL. |
| 113 */ |
| 114 static void *fts3MallocZero(int nByte){ |
| 115 void *pRet = sqlite3_malloc(nByte); |
| 116 if( pRet ) memset(pRet, 0, nByte); |
| 117 return pRet; |
| 118 } |
| 119 |
| 120 |
| 121 /* |
90 ** Extract the next token from buffer z (length n) using the tokenizer | 122 ** Extract the next token from buffer z (length n) using the tokenizer |
91 ** and other information (column names etc.) in pParse. Create an Fts3Expr | 123 ** and other information (column names etc.) in pParse. Create an Fts3Expr |
92 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this | 124 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this |
93 ** single token and set *ppExpr to point to it. If the end of the buffer is | 125 ** single token and set *ppExpr to point to it. If the end of the buffer is |
94 ** reached before a token is found, set *ppExpr to zero. It is the | 126 ** reached before a token is found, set *ppExpr to zero. It is the |
95 ** responsibility of the caller to eventually deallocate the allocated | 127 ** responsibility of the caller to eventually deallocate the allocated |
96 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). | 128 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). |
97 ** | 129 ** |
98 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation | 130 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation |
99 ** fails. | 131 ** fails. |
(...skipping 16 matching lines...) Expand all Loading... |
116 if( rc==SQLITE_OK ){ | 148 if( rc==SQLITE_OK ){ |
117 const char *zToken; | 149 const char *zToken; |
118 int nToken, iStart, iEnd, iPosition; | 150 int nToken, iStart, iEnd, iPosition; |
119 int nByte; /* total space to allocate */ | 151 int nByte; /* total space to allocate */ |
120 | 152 |
121 pCursor->pTokenizer = pTokenizer; | 153 pCursor->pTokenizer = pTokenizer; |
122 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); | 154 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
123 | 155 |
124 if( rc==SQLITE_OK ){ | 156 if( rc==SQLITE_OK ){ |
125 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; | 157 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
126 pRet = (Fts3Expr *)sqlite3_malloc(nByte); | 158 pRet = (Fts3Expr *)fts3MallocZero(nByte); |
127 if( !pRet ){ | 159 if( !pRet ){ |
128 rc = SQLITE_NOMEM; | 160 rc = SQLITE_NOMEM; |
129 }else{ | 161 }else{ |
130 memset(pRet, 0, nByte); | |
131 pRet->eType = FTSQUERY_PHRASE; | 162 pRet->eType = FTSQUERY_PHRASE; |
132 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; | 163 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
133 pRet->pPhrase->nToken = 1; | 164 pRet->pPhrase->nToken = 1; |
134 pRet->pPhrase->iColumn = iCol; | 165 pRet->pPhrase->iColumn = iCol; |
135 pRet->pPhrase->aToken[0].n = nToken; | 166 pRet->pPhrase->aToken[0].n = nToken; |
136 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; | 167 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; |
137 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); | 168 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
138 | 169 |
139 if( iEnd<n && z[iEnd]=='*' ){ | 170 if( iEnd<n && z[iEnd]=='*' ){ |
140 pRet->pPhrase->aToken[0].isPrefix = 1; | 171 pRet->pPhrase->aToken[0].isPrefix = 1; |
(...skipping 12 matching lines...) Expand all Loading... |
153 *pnConsumed = nConsumed; | 184 *pnConsumed = nConsumed; |
154 *ppExpr = pRet; | 185 *ppExpr = pRet; |
155 return rc; | 186 return rc; |
156 } | 187 } |
157 | 188 |
158 | 189 |
159 /* | 190 /* |
160 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, | 191 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
161 ** then free the old allocation. | 192 ** then free the old allocation. |
162 */ | 193 */ |
163 void *fts3ReallocOrFree(void *pOrig, int nNew){ | 194 static void *fts3ReallocOrFree(void *pOrig, int nNew){ |
164 void *pRet = sqlite3_realloc(pOrig, nNew); | 195 void *pRet = sqlite3_realloc(pOrig, nNew); |
165 if( !pRet ){ | 196 if( !pRet ){ |
166 sqlite3_free(pOrig); | 197 sqlite3_free(pOrig); |
167 } | 198 } |
168 return pRet; | 199 return pRet; |
169 } | 200 } |
170 | 201 |
171 /* | 202 /* |
172 ** Buffer zInput, length nInput, contains the contents of a quoted string | 203 ** Buffer zInput, length nInput, contains the contents of a quoted string |
173 ** that appeared as part of an fts3 query expression. Neither quote character | 204 ** that appeared as part of an fts3 query expression. Neither quote character |
(...skipping 22 matching lines...) Expand all Loading... |
196 rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); | 227 rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); |
197 if( rc==SQLITE_OK ){ | 228 if( rc==SQLITE_OK ){ |
198 int ii; | 229 int ii; |
199 pCursor->pTokenizer = pTokenizer; | 230 pCursor->pTokenizer = pTokenizer; |
200 for(ii=0; rc==SQLITE_OK; ii++){ | 231 for(ii=0; rc==SQLITE_OK; ii++){ |
201 const char *zToken; | 232 const char *zToken; |
202 int nToken, iBegin, iEnd, iPos; | 233 int nToken, iBegin, iEnd, iPos; |
203 rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); | 234 rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); |
204 if( rc==SQLITE_OK ){ | 235 if( rc==SQLITE_OK ){ |
205 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 236 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
206 p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken)); | 237 p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); |
207 zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); | 238 zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); |
208 if( !p || !zTemp ){ | 239 if( !p || !zTemp ){ |
209 goto no_mem; | 240 goto no_mem; |
210 } | 241 } |
211 if( ii==0 ){ | 242 if( ii==0 ){ |
212 memset(p, 0, nByte); | 243 memset(p, 0, nByte); |
213 p->pPhrase = (Fts3Phrase *)&p[1]; | 244 p->pPhrase = (Fts3Phrase *)&p[1]; |
214 } | 245 } |
215 p->pPhrase = (Fts3Phrase *)&p[1]; | 246 p->pPhrase = (Fts3Phrase *)&p[1]; |
| 247 memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); |
216 p->pPhrase->nToken = ii+1; | 248 p->pPhrase->nToken = ii+1; |
217 p->pPhrase->aToken[ii].n = nToken; | 249 p->pPhrase->aToken[ii].n = nToken; |
218 memcpy(&zTemp[nTemp], zToken, nToken); | 250 memcpy(&zTemp[nTemp], zToken, nToken); |
219 nTemp += nToken; | 251 nTemp += nToken; |
220 if( iEnd<nInput && zInput[iEnd]=='*' ){ | 252 if( iEnd<nInput && zInput[iEnd]=='*' ){ |
221 p->pPhrase->aToken[ii].isPrefix = 1; | 253 p->pPhrase->aToken[ii].isPrefix = 1; |
222 }else{ | 254 }else{ |
223 p->pPhrase->aToken[ii].isPrefix = 0; | 255 p->pPhrase->aToken[ii].isPrefix = 0; |
224 } | 256 } |
225 } | 257 } |
226 } | 258 } |
227 | 259 |
228 pModule->xClose(pCursor); | 260 pModule->xClose(pCursor); |
229 pCursor = 0; | 261 pCursor = 0; |
230 } | 262 } |
231 | 263 |
232 if( rc==SQLITE_DONE ){ | 264 if( rc==SQLITE_DONE ){ |
233 int jj; | 265 int jj; |
234 char *zNew; | 266 char *zNew = NULL; |
235 int nNew = 0; | 267 int nNew = 0; |
236 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | 268 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
237 nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(struct PhraseToken); | 269 nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); |
238 p = fts3ReallocOrFree(p, nByte + nTemp); | 270 p = fts3ReallocOrFree(p, nByte + nTemp); |
239 if( !p ){ | 271 if( !p ){ |
240 goto no_mem; | 272 goto no_mem; |
241 } | 273 } |
242 if( zTemp ){ | 274 if( zTemp ){ |
243 zNew = &(((char *)p)[nByte]); | 275 zNew = &(((char *)p)[nByte]); |
244 memcpy(zNew, zTemp, nTemp); | 276 memcpy(zNew, zTemp, nTemp); |
245 }else{ | 277 }else{ |
246 memset(p, 0, nByte+nTemp); | 278 memset(p, 0, nByte+nTemp); |
247 } | 279 } |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
283 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. | 315 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. |
284 ** If SQLITE_ERROR is returned, pContext is populated with an error message. | 316 ** If SQLITE_ERROR is returned, pContext is populated with an error message. |
285 */ | 317 */ |
286 static int getNextNode( | 318 static int getNextNode( |
287 ParseContext *pParse, /* fts3 query parse context */ | 319 ParseContext *pParse, /* fts3 query parse context */ |
288 const char *z, int n, /* Input string */ | 320 const char *z, int n, /* Input string */ |
289 Fts3Expr **ppExpr, /* OUT: expression */ | 321 Fts3Expr **ppExpr, /* OUT: expression */ |
290 int *pnConsumed /* OUT: Number of bytes consumed */ | 322 int *pnConsumed /* OUT: Number of bytes consumed */ |
291 ){ | 323 ){ |
292 static const struct Fts3Keyword { | 324 static const struct Fts3Keyword { |
293 char z[4]; /* Keyword text */ | 325 char *z; /* Keyword text */ |
294 unsigned char n; /* Length of the keyword */ | 326 unsigned char n; /* Length of the keyword */ |
295 unsigned char parenOnly; /* Only valid in paren mode */ | 327 unsigned char parenOnly; /* Only valid in paren mode */ |
296 unsigned char eType; /* Keyword code */ | 328 unsigned char eType; /* Keyword code */ |
297 } aKeyword[] = { | 329 } aKeyword[] = { |
298 { "OR" , 2, 0, FTSQUERY_OR }, | 330 { "OR" , 2, 0, FTSQUERY_OR }, |
299 { "AND", 3, 1, FTSQUERY_AND }, | 331 { "AND", 3, 1, FTSQUERY_AND }, |
300 { "NOT", 3, 1, FTSQUERY_NOT }, | 332 { "NOT", 3, 1, FTSQUERY_NOT }, |
301 { "NEAR", 4, 0, FTSQUERY_NEAR } | 333 { "NEAR", 4, 0, FTSQUERY_NEAR } |
302 }; | 334 }; |
303 int ii; | 335 int ii; |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
345 } | 377 } |
346 | 378 |
347 /* At this point this is probably a keyword. But for that to be true, | 379 /* At this point this is probably a keyword. But for that to be true, |
348 ** the next byte must contain either whitespace, an open or close | 380 ** the next byte must contain either whitespace, an open or close |
349 ** parenthesis, a quote character, or EOF. | 381 ** parenthesis, a quote character, or EOF. |
350 */ | 382 */ |
351 cNext = zInput[nKey]; | 383 cNext = zInput[nKey]; |
352 if( fts3isspace(cNext) | 384 if( fts3isspace(cNext) |
353 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 | 385 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 |
354 ){ | 386 ){ |
355 pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr)); | 387 pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); |
356 memset(pRet, 0, sizeof(Fts3Expr)); | 388 if( !pRet ){ |
| 389 return SQLITE_NOMEM; |
| 390 } |
357 pRet->eType = pKey->eType; | 391 pRet->eType = pKey->eType; |
358 pRet->nNear = nNear; | 392 pRet->nNear = nNear; |
359 *ppExpr = pRet; | 393 *ppExpr = pRet; |
360 *pnConsumed = (zInput - z) + nKey; | 394 *pnConsumed = (int)((zInput - z) + nKey); |
361 return SQLITE_OK; | 395 return SQLITE_OK; |
362 } | 396 } |
363 | 397 |
364 /* Turns out that wasn't a keyword after all. This happens if the | 398 /* Turns out that wasn't a keyword after all. This happens if the |
365 ** user has supplied a token such as "ORacle". Continue. | 399 ** user has supplied a token such as "ORacle". Continue. |
366 */ | 400 */ |
367 } | 401 } |
368 } | 402 } |
369 | 403 |
370 /* Check for an open bracket. */ | 404 /* Check for an open bracket. */ |
371 if( sqlite3_fts3_enable_parentheses ){ | 405 if( sqlite3_fts3_enable_parentheses ){ |
372 if( *zInput=='(' ){ | 406 if( *zInput=='(' ){ |
373 int nConsumed; | 407 int nConsumed; |
374 int rc; | |
375 pParse->nNest++; | 408 pParse->nNest++; |
376 rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); | 409 rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); |
377 if( rc==SQLITE_OK && !*ppExpr ){ | 410 if( rc==SQLITE_OK && !*ppExpr ){ |
378 rc = SQLITE_DONE; | 411 rc = SQLITE_DONE; |
379 } | 412 } |
380 *pnConsumed = (zInput - z) + 1 + nConsumed; | 413 *pnConsumed = (int)((zInput - z) + 1 + nConsumed); |
381 return rc; | 414 return rc; |
382 } | 415 } |
383 | 416 |
384 /* Check for a close bracket. */ | 417 /* Check for a close bracket. */ |
385 if( *zInput==')' ){ | 418 if( *zInput==')' ){ |
386 pParse->nNest--; | 419 pParse->nNest--; |
387 *pnConsumed = (zInput - z) + 1; | 420 *pnConsumed = (int)((zInput - z) + 1); |
388 return SQLITE_DONE; | 421 return SQLITE_DONE; |
389 } | 422 } |
390 } | 423 } |
391 | 424 |
392 /* See if we are dealing with a quoted phrase. If this is the case, then | 425 /* See if we are dealing with a quoted phrase. If this is the case, then |
393 ** search for the closing quote and pass the whole string to getNextString() | 426 ** search for the closing quote and pass the whole string to getNextString() |
394 ** for processing. This is easy to do, as fts3 has no syntax for escaping | 427 ** for processing. This is easy to do, as fts3 has no syntax for escaping |
395 ** a quote character embedded in a string. | 428 ** a quote character embedded in a string. |
396 */ | 429 */ |
397 if( *zInput=='"' ){ | 430 if( *zInput=='"' ){ |
398 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); | 431 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); |
399 *pnConsumed = (zInput - z) + ii + 1; | 432 *pnConsumed = (int)((zInput - z) + ii + 1); |
400 if( ii==nInput ){ | 433 if( ii==nInput ){ |
401 return SQLITE_ERROR; | 434 return SQLITE_ERROR; |
402 } | 435 } |
403 return getNextString(pParse, &zInput[1], ii-1, ppExpr); | 436 return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
404 } | 437 } |
405 | 438 |
406 | 439 |
407 /* If control flows to this point, this must be a regular token, or | 440 /* If control flows to this point, this must be a regular token, or |
408 ** the end of the input. Read a regular token using the sqlite3_tokenizer | 441 ** the end of the input. Read a regular token using the sqlite3_tokenizer |
409 ** interface. Before doing so, figure out if there is an explicit | 442 ** interface. Before doing so, figure out if there is an explicit |
410 ** column specifier for the token. | 443 ** column specifier for the token. |
411 ** | 444 ** |
412 ** TODO: Strangely, it is not possible to associate a column specifier | 445 ** TODO: Strangely, it is not possible to associate a column specifier |
413 ** with a quoted phrase, only with a single token. Not sure if this was | 446 ** with a quoted phrase, only with a single token. Not sure if this was |
414 ** an implementation artifact or an intentional decision when fts3 was | 447 ** an implementation artifact or an intentional decision when fts3 was |
415 ** first implemented. Whichever it was, this module duplicates the | 448 ** first implemented. Whichever it was, this module duplicates the |
416 ** limitation. | 449 ** limitation. |
417 */ | 450 */ |
418 iCol = pParse->iDefaultCol; | 451 iCol = pParse->iDefaultCol; |
419 iColLen = 0; | 452 iColLen = 0; |
420 for(ii=0; ii<pParse->nCol; ii++){ | 453 for(ii=0; ii<pParse->nCol; ii++){ |
421 const char *zStr = pParse->azCol[ii]; | 454 const char *zStr = pParse->azCol[ii]; |
422 int nStr = strlen(zStr); | 455 int nStr = (int)strlen(zStr); |
423 if( nInput>nStr && zInput[nStr]==':' | 456 if( nInput>nStr && zInput[nStr]==':' |
424 && sqlite3_strnicmp(zStr, zInput, nStr)==0 | 457 && sqlite3_strnicmp(zStr, zInput, nStr)==0 |
425 ){ | 458 ){ |
426 iCol = ii; | 459 iCol = ii; |
427 iColLen = ((zInput - z) + nStr + 1); | 460 iColLen = (int)((zInput - z) + nStr + 1); |
428 break; | 461 break; |
429 } | 462 } |
430 } | 463 } |
431 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); | 464 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); |
432 *pnConsumed += iColLen; | 465 *pnConsumed += iColLen; |
433 return rc; | 466 return rc; |
434 } | 467 } |
435 | 468 |
436 /* | 469 /* |
437 ** The argument is an Fts3Expr structure for a binary operator (any type | 470 ** The argument is an Fts3Expr structure for a binary operator (any type |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
522 Fts3Expr *p = 0; | 555 Fts3Expr *p = 0; |
523 int nByte = 0; | 556 int nByte = 0; |
524 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); | 557 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
525 if( rc==SQLITE_OK ){ | 558 if( rc==SQLITE_OK ){ |
526 int isPhrase; | 559 int isPhrase; |
527 | 560 |
528 if( !sqlite3_fts3_enable_parentheses | 561 if( !sqlite3_fts3_enable_parentheses |
529 && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot | 562 && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot |
530 ){ | 563 ){ |
531 /* Create an implicit NOT operator. */ | 564 /* Create an implicit NOT operator. */ |
532 Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr)); | 565 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
533 if( !pNot ){ | 566 if( !pNot ){ |
534 sqlite3Fts3ExprFree(p); | 567 sqlite3Fts3ExprFree(p); |
535 rc = SQLITE_NOMEM; | 568 rc = SQLITE_NOMEM; |
536 goto exprparse_out; | 569 goto exprparse_out; |
537 } | 570 } |
538 memset(pNot, 0, sizeof(Fts3Expr)); | |
539 pNot->eType = FTSQUERY_NOT; | 571 pNot->eType = FTSQUERY_NOT; |
540 pNot->pRight = p; | 572 pNot->pRight = p; |
541 if( pNotBranch ){ | 573 if( pNotBranch ){ |
542 pNot->pLeft = pNotBranch; | 574 pNot->pLeft = pNotBranch; |
543 } | 575 } |
544 pNotBranch = pNot; | 576 pNotBranch = pNot; |
545 p = pPrev; | 577 p = pPrev; |
546 }else{ | 578 }else{ |
547 int eType = p->eType; | 579 int eType = p->eType; |
548 assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); | 580 assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); |
549 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); | 581 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
550 | 582 |
551 /* The isRequirePhrase variable is set to true if a phrase or | 583 /* The isRequirePhrase variable is set to true if a phrase or |
552 ** an expression contained in parenthesis is required. If a | 584 ** an expression contained in parenthesis is required. If a |
553 ** binary operator (AND, OR, NOT or NEAR) is encounted when | 585 ** binary operator (AND, OR, NOT or NEAR) is encounted when |
554 ** isRequirePhrase is set, this is a syntax error. | 586 ** isRequirePhrase is set, this is a syntax error. |
555 */ | 587 */ |
556 if( !isPhrase && isRequirePhrase ){ | 588 if( !isPhrase && isRequirePhrase ){ |
557 sqlite3Fts3ExprFree(p); | 589 sqlite3Fts3ExprFree(p); |
558 rc = SQLITE_ERROR; | 590 rc = SQLITE_ERROR; |
559 goto exprparse_out; | 591 goto exprparse_out; |
560 } | 592 } |
561 | 593 |
562 if( isPhrase && !isRequirePhrase ){ | 594 if( isPhrase && !isRequirePhrase ){ |
563 /* Insert an implicit AND operator. */ | 595 /* Insert an implicit AND operator. */ |
564 Fts3Expr *pAnd; | 596 Fts3Expr *pAnd; |
565 assert( pRet && pPrev ); | 597 assert( pRet && pPrev ); |
566 pAnd = sqlite3_malloc(sizeof(Fts3Expr)); | 598 pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
567 if( !pAnd ){ | 599 if( !pAnd ){ |
568 sqlite3Fts3ExprFree(p); | 600 sqlite3Fts3ExprFree(p); |
569 rc = SQLITE_NOMEM; | 601 rc = SQLITE_NOMEM; |
570 goto exprparse_out; | 602 goto exprparse_out; |
571 } | 603 } |
572 memset(pAnd, 0, sizeof(Fts3Expr)); | |
573 pAnd->eType = FTSQUERY_AND; | 604 pAnd->eType = FTSQUERY_AND; |
574 insertBinaryOperator(&pRet, pPrev, pAnd); | 605 insertBinaryOperator(&pRet, pPrev, pAnd); |
575 pPrev = pAnd; | 606 pPrev = pAnd; |
576 } | 607 } |
577 | 608 |
578 /* This test catches attempts to make either operand of a NEAR | 609 /* This test catches attempts to make either operand of a NEAR |
579 ** operator something other than a phrase. For example, either of | 610 ** operator something other than a phrase. For example, either of |
580 ** the following: | 611 ** the following: |
581 ** | 612 ** |
582 ** (bracketed expression) NEAR phrase | 613 ** (bracketed expression) NEAR phrase |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
683 sParse.pTokenizer = pTokenizer; | 714 sParse.pTokenizer = pTokenizer; |
684 sParse.azCol = (const char **)azCol; | 715 sParse.azCol = (const char **)azCol; |
685 sParse.nCol = nCol; | 716 sParse.nCol = nCol; |
686 sParse.iDefaultCol = iDefaultCol; | 717 sParse.iDefaultCol = iDefaultCol; |
687 sParse.nNest = 0; | 718 sParse.nNest = 0; |
688 if( z==0 ){ | 719 if( z==0 ){ |
689 *ppExpr = 0; | 720 *ppExpr = 0; |
690 return SQLITE_OK; | 721 return SQLITE_OK; |
691 } | 722 } |
692 if( n<0 ){ | 723 if( n<0 ){ |
693 n = strlen(z); | 724 n = (int)strlen(z); |
694 } | 725 } |
695 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); | 726 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
696 | 727 |
697 /* Check for mismatched parenthesis */ | 728 /* Check for mismatched parenthesis */ |
698 if( rc==SQLITE_OK && sParse.nNest ){ | 729 if( rc==SQLITE_OK && sParse.nNest ){ |
699 rc = SQLITE_ERROR; | 730 rc = SQLITE_ERROR; |
700 sqlite3Fts3ExprFree(*ppExpr); | 731 sqlite3Fts3ExprFree(*ppExpr); |
701 *ppExpr = 0; | 732 *ppExpr = 0; |
702 } | 733 } |
703 | 734 |
704 return rc; | 735 return rc; |
705 } | 736 } |
706 | 737 |
707 /* | 738 /* |
708 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). | 739 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
709 */ | 740 */ |
710 void sqlite3Fts3ExprFree(Fts3Expr *p){ | 741 void sqlite3Fts3ExprFree(Fts3Expr *p){ |
711 if( p ){ | 742 if( p ){ |
712 sqlite3Fts3ExprFree(p->pLeft); | 743 sqlite3Fts3ExprFree(p->pLeft); |
713 sqlite3Fts3ExprFree(p->pRight); | 744 sqlite3Fts3ExprFree(p->pRight); |
| 745 sqlite3_free(p->aDoclist); |
714 sqlite3_free(p); | 746 sqlite3_free(p); |
715 } | 747 } |
716 } | 748 } |
717 | 749 |
718 /**************************************************************************** | 750 /**************************************************************************** |
719 ***************************************************************************** | 751 ***************************************************************************** |
720 ** Everything after this point is just test code. | 752 ** Everything after this point is just test code. |
721 */ | 753 */ |
722 | 754 |
723 #ifdef SQLITE_TEST | 755 #ifdef SQLITE_TEST |
(...skipping 14 matching lines...) Expand all Loading... |
738 | 770 |
739 *pp = 0; | 771 *pp = 0; |
740 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | 772 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
741 if( rc!=SQLITE_OK ){ | 773 if( rc!=SQLITE_OK ){ |
742 return rc; | 774 return rc; |
743 } | 775 } |
744 | 776 |
745 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); | 777 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
746 if( SQLITE_ROW==sqlite3_step(pStmt) ){ | 778 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
747 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ | 779 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
748 memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); | 780 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
749 } | 781 } |
750 } | 782 } |
751 | 783 |
752 return sqlite3_finalize(pStmt); | 784 return sqlite3_finalize(pStmt); |
753 } | 785 } |
754 | 786 |
755 /* | 787 /* |
756 ** This function is part of the test interface for the query parser. It | 788 ** Return a pointer to a buffer containing a text representation of the |
757 ** writes a text representation of the query expression pExpr into the | 789 ** expression passed as the first argument. The buffer is obtained from |
758 ** buffer pointed to by argument zBuf. It is assumed that zBuf is large | 790 ** sqlite3_malloc(). It is the responsibility of the caller to use |
759 ** enough to store the required text representation. | 791 ** sqlite3_free() to release the memory. If an OOM condition is encountered, |
| 792 ** NULL is returned. |
| 793 ** |
| 794 ** If the second argument is not NULL, then its contents are prepended to |
| 795 ** the returned expression text and then freed using sqlite3_free(). |
760 */ | 796 */ |
761 static void exprToString(Fts3Expr *pExpr, char *zBuf){ | 797 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
762 switch( pExpr->eType ){ | 798 switch( pExpr->eType ){ |
763 case FTSQUERY_PHRASE: { | 799 case FTSQUERY_PHRASE: { |
764 Fts3Phrase *pPhrase = pExpr->pPhrase; | 800 Fts3Phrase *pPhrase = pExpr->pPhrase; |
765 int i; | 801 int i; |
766 zBuf += sprintf(zBuf, "PHRASE %d %d", pPhrase->iColumn, pPhrase->isNot); | 802 zBuf = sqlite3_mprintf( |
767 for(i=0; i<pPhrase->nToken; i++){ | 803 "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); |
768 zBuf += sprintf(zBuf," %.*s",pPhrase->aToken[i].n,pPhrase->aToken[i].z); | 804 for(i=0; zBuf && i<pPhrase->nToken; i++){ |
769 zBuf += sprintf(zBuf,"%s", (pPhrase->aToken[i].isPrefix?"+":"")); | 805 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
| 806 pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
| 807 (pPhrase->aToken[i].isPrefix?"+":"") |
| 808 ); |
770 } | 809 } |
771 return; | 810 return zBuf; |
772 } | 811 } |
773 | 812 |
774 case FTSQUERY_NEAR: | 813 case FTSQUERY_NEAR: |
775 zBuf += sprintf(zBuf, "NEAR/%d ", pExpr->nNear); | 814 zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); |
776 break; | 815 break; |
777 case FTSQUERY_NOT: | 816 case FTSQUERY_NOT: |
778 zBuf += sprintf(zBuf, "NOT "); | 817 zBuf = sqlite3_mprintf("%zNOT ", zBuf); |
779 break; | 818 break; |
780 case FTSQUERY_AND: | 819 case FTSQUERY_AND: |
781 zBuf += sprintf(zBuf, "AND "); | 820 zBuf = sqlite3_mprintf("%zAND ", zBuf); |
782 break; | 821 break; |
783 case FTSQUERY_OR: | 822 case FTSQUERY_OR: |
784 zBuf += sprintf(zBuf, "OR "); | 823 zBuf = sqlite3_mprintf("%zOR ", zBuf); |
785 break; | 824 break; |
786 } | 825 } |
787 | 826 |
788 zBuf += sprintf(zBuf, "{"); | 827 if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); |
789 exprToString(pExpr->pLeft, zBuf); | 828 if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); |
790 zBuf += strlen(zBuf); | 829 if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); |
791 zBuf += sprintf(zBuf, "} "); | |
792 | 830 |
793 zBuf += sprintf(zBuf, "{"); | 831 if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); |
794 exprToString(pExpr->pRight, zBuf); | 832 if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); |
795 zBuf += strlen(zBuf); | 833 |
796 zBuf += sprintf(zBuf, "}"); | 834 return zBuf; |
797 } | 835 } |
798 | 836 |
799 /* | 837 /* |
800 ** This is the implementation of a scalar SQL function used to test the | 838 ** This is the implementation of a scalar SQL function used to test the |
801 ** expression parser. It should be called as follows: | 839 ** expression parser. It should be called as follows: |
802 ** | 840 ** |
803 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); | 841 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); |
804 ** | 842 ** |
805 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used | 843 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used |
806 ** to parse the query expression (see README.tokenizers). The second argument | 844 ** to parse the query expression (see README.tokenizers). The second argument |
(...skipping 10 matching lines...) Expand all Loading... |
817 ){ | 855 ){ |
818 sqlite3_tokenizer_module const *pModule = 0; | 856 sqlite3_tokenizer_module const *pModule = 0; |
819 sqlite3_tokenizer *pTokenizer = 0; | 857 sqlite3_tokenizer *pTokenizer = 0; |
820 int rc; | 858 int rc; |
821 char **azCol = 0; | 859 char **azCol = 0; |
822 const char *zExpr; | 860 const char *zExpr; |
823 int nExpr; | 861 int nExpr; |
824 int nCol; | 862 int nCol; |
825 int ii; | 863 int ii; |
826 Fts3Expr *pExpr; | 864 Fts3Expr *pExpr; |
| 865 char *zBuf = 0; |
827 sqlite3 *db = sqlite3_context_db_handle(context); | 866 sqlite3 *db = sqlite3_context_db_handle(context); |
828 | 867 |
829 if( argc<3 ){ | 868 if( argc<3 ){ |
830 sqlite3_result_error(context, | 869 sqlite3_result_error(context, |
831 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 | 870 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 |
832 ); | 871 ); |
833 return; | 872 return; |
834 } | 873 } |
835 | 874 |
836 rc = queryTestTokenizer(db, | 875 rc = queryTestTokenizer(db, |
(...skipping 22 matching lines...) Expand all Loading... |
859 sqlite3_result_error_nomem(context); | 898 sqlite3_result_error_nomem(context); |
860 goto exprtest_out; | 899 goto exprtest_out; |
861 } | 900 } |
862 for(ii=0; ii<nCol; ii++){ | 901 for(ii=0; ii<nCol; ii++){ |
863 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); | 902 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
864 } | 903 } |
865 | 904 |
866 rc = sqlite3Fts3ExprParse( | 905 rc = sqlite3Fts3ExprParse( |
867 pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr | 906 pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr |
868 ); | 907 ); |
869 if( rc==SQLITE_NOMEM ){ | 908 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
| 909 sqlite3_result_error(context, "Error parsing expression", -1); |
| 910 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
870 sqlite3_result_error_nomem(context); | 911 sqlite3_result_error_nomem(context); |
871 goto exprtest_out; | 912 }else{ |
872 }else if( rc==SQLITE_OK ){ | |
873 char zBuf[4096]; | |
874 exprToString(pExpr, zBuf); | |
875 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); | 913 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); |
876 sqlite3Fts3ExprFree(pExpr); | 914 sqlite3_free(zBuf); |
877 }else{ | |
878 sqlite3_result_error(context, "Error parsing expression", -1); | |
879 } | 915 } |
880 | 916 |
| 917 sqlite3Fts3ExprFree(pExpr); |
| 918 |
881 exprtest_out: | 919 exprtest_out: |
882 if( pModule && pTokenizer ){ | 920 if( pModule && pTokenizer ){ |
883 rc = pModule->xDestroy(pTokenizer); | 921 rc = pModule->xDestroy(pTokenizer); |
884 } | 922 } |
885 sqlite3_free(azCol); | 923 sqlite3_free(azCol); |
886 } | 924 } |
887 | 925 |
888 /* | 926 /* |
889 ** Register the query expression parser test function fts3_exprtest() | 927 ** Register the query expression parser test function fts3_exprtest() |
890 ** with database connection db. | 928 ** with database connection db. |
891 */ | 929 */ |
892 void sqlite3Fts3ExprInitTestInterface(sqlite3* db){ | 930 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
893 sqlite3_create_function( | 931 return sqlite3_create_function( |
894 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 | 932 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
895 ); | 933 ); |
896 } | 934 } |
897 | 935 |
898 #endif | 936 #endif |
899 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 937 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
OLD | NEW |