OLD | NEW |
| (Empty) |
1 /* | |
2 ** 2008 Nov 28 | |
3 ** | |
4 ** The author disclaims copyright to this source code. In place of | |
5 ** a legal notice, here is a blessing: | |
6 ** | |
7 ** May you do good and not evil. | |
8 ** May you find forgiveness for yourself and forgive others. | |
9 ** May you share freely, never taking more than you give. | |
10 ** | |
11 ****************************************************************************** | |
12 ** | |
13 ** This module contains code that implements a parser for fts3 query strings | |
14 ** (the right-hand argument to the MATCH operator). Because the supported | |
15 ** syntax is relatively simple, the whole tokenizer/parser system is | |
16 ** hand-coded. | |
17 */ | |
18 #include "fts3Int.h" | |
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | |
20 | |
21 /* | |
22 ** By default, this module parses the legacy syntax that has been | |
23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS | |
24 ** is defined, then it uses the new syntax. The differences between | |
25 ** the new and the old syntaxes are: | |
26 ** | |
27 ** a) The new syntax supports parenthesis. The old does not. | |
28 ** | |
29 ** b) The new syntax supports the AND and NOT operators. The old does not. | |
30 ** | |
31 ** c) The old syntax supports the "-" token qualifier. This is not | |
32 ** supported by the new syntax (it is replaced by the NOT operator). | |
33 ** | |
34 ** d) When using the old syntax, the OR operator has a greater precedence | |
35 ** than an implicit AND. When using the new, both implicity and explicit | |
36 ** AND operators have a higher precedence than OR. | |
37 ** | |
38 ** If compiled with SQLITE_TEST defined, then this module exports the | |
39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable | |
40 ** to zero causes the module to use the old syntax. If it is set to | |
41 ** non-zero the new syntax is activated. This is so both syntaxes can | |
42 ** be tested using a single build of testfixture. | |
43 ** | |
44 ** The following describes the syntax supported by the fts3 MATCH | |
45 ** operator in a similar format to that used by the lemon parser | |
46 ** generator. This module does not use actually lemon, it uses a | |
47 ** custom parser. | |
48 ** | |
49 ** query ::= andexpr (OR andexpr)*. | |
50 ** | |
51 ** andexpr ::= notexpr (AND? notexpr)*. | |
52 ** | |
53 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. | |
54 ** notexpr ::= LP query RP. | |
55 ** | |
56 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. | |
57 ** | |
58 ** distance_opt ::= . | |
59 ** distance_opt ::= / INTEGER. | |
60 ** | |
61 ** phrase ::= TOKEN. | |
62 ** phrase ::= COLUMN:TOKEN. | |
63 ** phrase ::= "TOKEN TOKEN TOKEN...". | |
64 */ | |
65 | |
66 #ifdef SQLITE_TEST | |
67 int sqlite3_fts3_enable_parentheses = 0; | |
68 #else | |
69 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS | |
70 # define sqlite3_fts3_enable_parentheses 1 | |
71 # else | |
72 # define sqlite3_fts3_enable_parentheses 0 | |
73 # endif | |
74 #endif | |
75 | |
76 /* | |
77 ** Default span for NEAR operators. | |
78 */ | |
79 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 | |
80 | |
81 #include <string.h> | |
82 #include <assert.h> | |
83 | |
84 /* | |
85 ** isNot: | |
86 ** This variable is used by function getNextNode(). When getNextNode() is | |
87 ** called, it sets ParseContext.isNot to true if the 'next node' is a | |
88 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the | |
89 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to | |
90 ** zero. | |
91 */ | |
92 typedef struct ParseContext ParseContext; | |
93 struct ParseContext { | |
94 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ | |
95 int iLangid; /* Language id used with tokenizer */ | |
96 const char **azCol; /* Array of column names for fts3 table */ | |
97 int bFts4; /* True to allow FTS4-only syntax */ | |
98 int nCol; /* Number of entries in azCol[] */ | |
99 int iDefaultCol; /* Default column to query */ | |
100 int isNot; /* True if getNextNode() sees a unary - */ | |
101 sqlite3_context *pCtx; /* Write error message here */ | |
102 int nNest; /* Number of nested brackets */ | |
103 }; | |
104 | |
105 /* | |
106 ** This function is equivalent to the standard isspace() function. | |
107 ** | |
108 ** The standard isspace() can be awkward to use safely, because although it | |
109 ** is defined to accept an argument of type int, its behavior when passed | |
110 ** an integer that falls outside of the range of the unsigned char type | |
111 ** is undefined (and sometimes, "undefined" means segfault). This wrapper | |
112 ** is defined to accept an argument of type char, and always returns 0 for | |
113 ** any values that fall outside of the range of the unsigned char type (i.e. | |
114 ** negative values). | |
115 */ | |
116 static int fts3isspace(char c){ | |
117 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; | |
118 } | |
119 | |
120 /* | |
121 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, | |
122 ** zero the memory before returning a pointer to it. If unsuccessful, | |
123 ** return NULL. | |
124 */ | |
125 static void *fts3MallocZero(int nByte){ | |
126 void *pRet = sqlite3_malloc(nByte); | |
127 if( pRet ) memset(pRet, 0, nByte); | |
128 return pRet; | |
129 } | |
130 | |
131 int sqlite3Fts3OpenTokenizer( | |
132 sqlite3_tokenizer *pTokenizer, | |
133 int iLangid, | |
134 const char *z, | |
135 int n, | |
136 sqlite3_tokenizer_cursor **ppCsr | |
137 ){ | |
138 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | |
139 sqlite3_tokenizer_cursor *pCsr = 0; | |
140 int rc; | |
141 | |
142 rc = pModule->xOpen(pTokenizer, z, n, &pCsr); | |
143 assert( rc==SQLITE_OK || pCsr==0 ); | |
144 if( rc==SQLITE_OK ){ | |
145 pCsr->pTokenizer = pTokenizer; | |
146 if( pModule->iVersion>=1 ){ | |
147 rc = pModule->xLanguageid(pCsr, iLangid); | |
148 if( rc!=SQLITE_OK ){ | |
149 pModule->xClose(pCsr); | |
150 pCsr = 0; | |
151 } | |
152 } | |
153 } | |
154 *ppCsr = pCsr; | |
155 return rc; | |
156 } | |
157 | |
158 /* | |
159 ** Function getNextNode(), which is called by fts3ExprParse(), may itself | |
160 ** call fts3ExprParse(). So this forward declaration is required. | |
161 */ | |
162 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); | |
163 | |
164 /* | |
165 ** Extract the next token from buffer z (length n) using the tokenizer | |
166 ** and other information (column names etc.) in pParse. Create an Fts3Expr | |
167 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this | |
168 ** single token and set *ppExpr to point to it. If the end of the buffer is | |
169 ** reached before a token is found, set *ppExpr to zero. It is the | |
170 ** responsibility of the caller to eventually deallocate the allocated | |
171 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). | |
172 ** | |
173 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation | |
174 ** fails. | |
175 */ | |
176 static int getNextToken( | |
177 ParseContext *pParse, /* fts3 query parse context */ | |
178 int iCol, /* Value for Fts3Phrase.iColumn */ | |
179 const char *z, int n, /* Input string */ | |
180 Fts3Expr **ppExpr, /* OUT: expression */ | |
181 int *pnConsumed /* OUT: Number of bytes consumed */ | |
182 ){ | |
183 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; | |
184 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | |
185 int rc; | |
186 sqlite3_tokenizer_cursor *pCursor; | |
187 Fts3Expr *pRet = 0; | |
188 int i = 0; | |
189 | |
190 /* Set variable i to the maximum number of bytes of input to tokenize. */ | |
191 for(i=0; i<n; i++){ | |
192 if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; | |
193 if( z[i]=='"' ) break; | |
194 } | |
195 | |
196 *pnConsumed = i; | |
197 rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); | |
198 if( rc==SQLITE_OK ){ | |
199 const char *zToken; | |
200 int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; | |
201 int nByte; /* total space to allocate */ | |
202 | |
203 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); | |
204 if( rc==SQLITE_OK ){ | |
205 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; | |
206 pRet = (Fts3Expr *)fts3MallocZero(nByte); | |
207 if( !pRet ){ | |
208 rc = SQLITE_NOMEM; | |
209 }else{ | |
210 pRet->eType = FTSQUERY_PHRASE; | |
211 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; | |
212 pRet->pPhrase->nToken = 1; | |
213 pRet->pPhrase->iColumn = iCol; | |
214 pRet->pPhrase->aToken[0].n = nToken; | |
215 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; | |
216 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); | |
217 | |
218 if( iEnd<n && z[iEnd]=='*' ){ | |
219 pRet->pPhrase->aToken[0].isPrefix = 1; | |
220 iEnd++; | |
221 } | |
222 | |
223 while( 1 ){ | |
224 if( !sqlite3_fts3_enable_parentheses | |
225 && iStart>0 && z[iStart-1]=='-' | |
226 ){ | |
227 pParse->isNot = 1; | |
228 iStart--; | |
229 }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ | |
230 pRet->pPhrase->aToken[0].bFirst = 1; | |
231 iStart--; | |
232 }else{ | |
233 break; | |
234 } | |
235 } | |
236 | |
237 } | |
238 *pnConsumed = iEnd; | |
239 }else if( i && rc==SQLITE_DONE ){ | |
240 rc = SQLITE_OK; | |
241 } | |
242 | |
243 pModule->xClose(pCursor); | |
244 } | |
245 | |
246 *ppExpr = pRet; | |
247 return rc; | |
248 } | |
249 | |
250 | |
251 /* | |
252 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, | |
253 ** then free the old allocation. | |
254 */ | |
255 static void *fts3ReallocOrFree(void *pOrig, int nNew){ | |
256 void *pRet = sqlite3_realloc(pOrig, nNew); | |
257 if( !pRet ){ | |
258 sqlite3_free(pOrig); | |
259 } | |
260 return pRet; | |
261 } | |
262 | |
263 /* | |
264 ** Buffer zInput, length nInput, contains the contents of a quoted string | |
265 ** that appeared as part of an fts3 query expression. Neither quote character | |
266 ** is included in the buffer. This function attempts to tokenize the entire | |
267 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE | |
268 ** containing the results. | |
269 ** | |
270 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the | |
271 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory | |
272 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set | |
273 ** to 0. | |
274 */ | |
275 static int getNextString( | |
276 ParseContext *pParse, /* fts3 query parse context */ | |
277 const char *zInput, int nInput, /* Input string */ | |
278 Fts3Expr **ppExpr /* OUT: expression */ | |
279 ){ | |
280 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; | |
281 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | |
282 int rc; | |
283 Fts3Expr *p = 0; | |
284 sqlite3_tokenizer_cursor *pCursor = 0; | |
285 char *zTemp = 0; | |
286 int nTemp = 0; | |
287 | |
288 const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | |
289 int nToken = 0; | |
290 | |
291 /* The final Fts3Expr data structure, including the Fts3Phrase, | |
292 ** Fts3PhraseToken structures token buffers are all stored as a single | |
293 ** allocation so that the expression can be freed with a single call to | |
294 ** sqlite3_free(). Setting this up requires a two pass approach. | |
295 ** | |
296 ** The first pass, in the block below, uses a tokenizer cursor to iterate | |
297 ** through the tokens in the expression. This pass uses fts3ReallocOrFree() | |
298 ** to assemble data in two dynamic buffers: | |
299 ** | |
300 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase | |
301 ** structure, followed by the array of Fts3PhraseToken | |
302 ** structures. This pass only populates the Fts3PhraseToken array. | |
303 ** | |
304 ** Buffer zTemp: Contains copies of all tokens. | |
305 ** | |
306 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, | |
307 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase | |
308 ** structures. | |
309 */ | |
310 rc = sqlite3Fts3OpenTokenizer( | |
311 pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); | |
312 if( rc==SQLITE_OK ){ | |
313 int ii; | |
314 for(ii=0; rc==SQLITE_OK; ii++){ | |
315 const char *zByte; | |
316 int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; | |
317 rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); | |
318 if( rc==SQLITE_OK ){ | |
319 Fts3PhraseToken *pToken; | |
320 | |
321 p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); | |
322 if( !p ) goto no_mem; | |
323 | |
324 zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); | |
325 if( !zTemp ) goto no_mem; | |
326 | |
327 assert( nToken==ii ); | |
328 pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; | |
329 memset(pToken, 0, sizeof(Fts3PhraseToken)); | |
330 | |
331 memcpy(&zTemp[nTemp], zByte, nByte); | |
332 nTemp += nByte; | |
333 | |
334 pToken->n = nByte; | |
335 pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); | |
336 pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); | |
337 nToken = ii+1; | |
338 } | |
339 } | |
340 | |
341 pModule->xClose(pCursor); | |
342 pCursor = 0; | |
343 } | |
344 | |
345 if( rc==SQLITE_DONE ){ | |
346 int jj; | |
347 char *zBuf = 0; | |
348 | |
349 p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); | |
350 if( !p ) goto no_mem; | |
351 memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); | |
352 p->eType = FTSQUERY_PHRASE; | |
353 p->pPhrase = (Fts3Phrase *)&p[1]; | |
354 p->pPhrase->iColumn = pParse->iDefaultCol; | |
355 p->pPhrase->nToken = nToken; | |
356 | |
357 zBuf = (char *)&p->pPhrase->aToken[nToken]; | |
358 if( zTemp ){ | |
359 memcpy(zBuf, zTemp, nTemp); | |
360 sqlite3_free(zTemp); | |
361 }else{ | |
362 assert( nTemp==0 ); | |
363 } | |
364 | |
365 for(jj=0; jj<p->pPhrase->nToken; jj++){ | |
366 p->pPhrase->aToken[jj].z = zBuf; | |
367 zBuf += p->pPhrase->aToken[jj].n; | |
368 } | |
369 rc = SQLITE_OK; | |
370 } | |
371 | |
372 *ppExpr = p; | |
373 return rc; | |
374 no_mem: | |
375 | |
376 if( pCursor ){ | |
377 pModule->xClose(pCursor); | |
378 } | |
379 sqlite3_free(zTemp); | |
380 sqlite3_free(p); | |
381 *ppExpr = 0; | |
382 return SQLITE_NOMEM; | |
383 } | |
384 | |
385 /* | |
386 ** The output variable *ppExpr is populated with an allocated Fts3Expr | |
387 ** structure, or set to 0 if the end of the input buffer is reached. | |
388 ** | |
389 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM | |
390 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. | |
391 ** If SQLITE_ERROR is returned, pContext is populated with an error message. | |
392 */ | |
393 static int getNextNode( | |
394 ParseContext *pParse, /* fts3 query parse context */ | |
395 const char *z, int n, /* Input string */ | |
396 Fts3Expr **ppExpr, /* OUT: expression */ | |
397 int *pnConsumed /* OUT: Number of bytes consumed */ | |
398 ){ | |
399 static const struct Fts3Keyword { | |
400 char *z; /* Keyword text */ | |
401 unsigned char n; /* Length of the keyword */ | |
402 unsigned char parenOnly; /* Only valid in paren mode */ | |
403 unsigned char eType; /* Keyword code */ | |
404 } aKeyword[] = { | |
405 { "OR" , 2, 0, FTSQUERY_OR }, | |
406 { "AND", 3, 1, FTSQUERY_AND }, | |
407 { "NOT", 3, 1, FTSQUERY_NOT }, | |
408 { "NEAR", 4, 0, FTSQUERY_NEAR } | |
409 }; | |
410 int ii; | |
411 int iCol; | |
412 int iColLen; | |
413 int rc; | |
414 Fts3Expr *pRet = 0; | |
415 | |
416 const char *zInput = z; | |
417 int nInput = n; | |
418 | |
419 pParse->isNot = 0; | |
420 | |
421 /* Skip over any whitespace before checking for a keyword, an open or | |
422 ** close bracket, or a quoted string. | |
423 */ | |
424 while( nInput>0 && fts3isspace(*zInput) ){ | |
425 nInput--; | |
426 zInput++; | |
427 } | |
428 if( nInput==0 ){ | |
429 return SQLITE_DONE; | |
430 } | |
431 | |
432 /* See if we are dealing with a keyword. */ | |
433 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ | |
434 const struct Fts3Keyword *pKey = &aKeyword[ii]; | |
435 | |
436 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ | |
437 continue; | |
438 } | |
439 | |
440 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ | |
441 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; | |
442 int nKey = pKey->n; | |
443 char cNext; | |
444 | |
445 /* If this is a "NEAR" keyword, check for an explicit nearness. */ | |
446 if( pKey->eType==FTSQUERY_NEAR ){ | |
447 assert( nKey==4 ); | |
448 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ | |
449 nNear = 0; | |
450 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ | |
451 nNear = nNear * 10 + (zInput[nKey] - '0'); | |
452 } | |
453 } | |
454 } | |
455 | |
456 /* At this point this is probably a keyword. But for that to be true, | |
457 ** the next byte must contain either whitespace, an open or close | |
458 ** parenthesis, a quote character, or EOF. | |
459 */ | |
460 cNext = zInput[nKey]; | |
461 if( fts3isspace(cNext) | |
462 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 | |
463 ){ | |
464 pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); | |
465 if( !pRet ){ | |
466 return SQLITE_NOMEM; | |
467 } | |
468 pRet->eType = pKey->eType; | |
469 pRet->nNear = nNear; | |
470 *ppExpr = pRet; | |
471 *pnConsumed = (int)((zInput - z) + nKey); | |
472 return SQLITE_OK; | |
473 } | |
474 | |
475 /* Turns out that wasn't a keyword after all. This happens if the | |
476 ** user has supplied a token such as "ORacle". Continue. | |
477 */ | |
478 } | |
479 } | |
480 | |
481 /* See if we are dealing with a quoted phrase. If this is the case, then | |
482 ** search for the closing quote and pass the whole string to getNextString() | |
483 ** for processing. This is easy to do, as fts3 has no syntax for escaping | |
484 ** a quote character embedded in a string. | |
485 */ | |
486 if( *zInput=='"' ){ | |
487 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); | |
488 *pnConsumed = (int)((zInput - z) + ii + 1); | |
489 if( ii==nInput ){ | |
490 return SQLITE_ERROR; | |
491 } | |
492 return getNextString(pParse, &zInput[1], ii-1, ppExpr); | |
493 } | |
494 | |
495 if( sqlite3_fts3_enable_parentheses ){ | |
496 if( *zInput=='(' ){ | |
497 int nConsumed = 0; | |
498 pParse->nNest++; | |
499 rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); | |
500 if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } | |
501 *pnConsumed = (int)(zInput - z) + 1 + nConsumed; | |
502 return rc; | |
503 }else if( *zInput==')' ){ | |
504 pParse->nNest--; | |
505 *pnConsumed = (int)((zInput - z) + 1); | |
506 *ppExpr = 0; | |
507 return SQLITE_DONE; | |
508 } | |
509 } | |
510 | |
511 /* If control flows to this point, this must be a regular token, or | |
512 ** the end of the input. Read a regular token using the sqlite3_tokenizer | |
513 ** interface. Before doing so, figure out if there is an explicit | |
514 ** column specifier for the token. | |
515 ** | |
516 ** TODO: Strangely, it is not possible to associate a column specifier | |
517 ** with a quoted phrase, only with a single token. Not sure if this was | |
518 ** an implementation artifact or an intentional decision when fts3 was | |
519 ** first implemented. Whichever it was, this module duplicates the | |
520 ** limitation. | |
521 */ | |
522 iCol = pParse->iDefaultCol; | |
523 iColLen = 0; | |
524 for(ii=0; ii<pParse->nCol; ii++){ | |
525 const char *zStr = pParse->azCol[ii]; | |
526 int nStr = (int)strlen(zStr); | |
527 if( nInput>nStr && zInput[nStr]==':' | |
528 && sqlite3_strnicmp(zStr, zInput, nStr)==0 | |
529 ){ | |
530 iCol = ii; | |
531 iColLen = (int)((zInput - z) + nStr + 1); | |
532 break; | |
533 } | |
534 } | |
535 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); | |
536 *pnConsumed += iColLen; | |
537 return rc; | |
538 } | |
539 | |
540 /* | |
541 ** The argument is an Fts3Expr structure for a binary operator (any type | |
542 ** except an FTSQUERY_PHRASE). Return an integer value representing the | |
543 ** precedence of the operator. Lower values have a higher precedence (i.e. | |
544 ** group more tightly). For example, in the C language, the == operator | |
545 ** groups more tightly than ||, and would therefore have a higher precedence. | |
546 ** | |
547 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS | |
548 ** is defined), the order of the operators in precedence from highest to | |
549 ** lowest is: | |
550 ** | |
551 ** NEAR | |
552 ** NOT | |
553 ** AND (including implicit ANDs) | |
554 ** OR | |
555 ** | |
556 ** Note that when using the old query syntax, the OR operator has a higher | |
557 ** precedence than the AND operator. | |
558 */ | |
559 static int opPrecedence(Fts3Expr *p){ | |
560 assert( p->eType!=FTSQUERY_PHRASE ); | |
561 if( sqlite3_fts3_enable_parentheses ){ | |
562 return p->eType; | |
563 }else if( p->eType==FTSQUERY_NEAR ){ | |
564 return 1; | |
565 }else if( p->eType==FTSQUERY_OR ){ | |
566 return 2; | |
567 } | |
568 assert( p->eType==FTSQUERY_AND ); | |
569 return 3; | |
570 } | |
571 | |
572 /* | |
573 ** Argument ppHead contains a pointer to the current head of a query | |
574 ** expression tree being parsed. pPrev is the expression node most recently | |
575 ** inserted into the tree. This function adds pNew, which is always a binary | |
576 ** operator node, into the expression tree based on the relative precedence | |
577 ** of pNew and the existing nodes of the tree. This may result in the head | |
578 ** of the tree changing, in which case *ppHead is set to the new root node. | |
579 */ | |
580 static void insertBinaryOperator( | |
581 Fts3Expr **ppHead, /* Pointer to the root node of a tree */ | |
582 Fts3Expr *pPrev, /* Node most recently inserted into the tree */ | |
583 Fts3Expr *pNew /* New binary node to insert into expression tree */ | |
584 ){ | |
585 Fts3Expr *pSplit = pPrev; | |
586 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ | |
587 pSplit = pSplit->pParent; | |
588 } | |
589 | |
590 if( pSplit->pParent ){ | |
591 assert( pSplit->pParent->pRight==pSplit ); | |
592 pSplit->pParent->pRight = pNew; | |
593 pNew->pParent = pSplit->pParent; | |
594 }else{ | |
595 *ppHead = pNew; | |
596 } | |
597 pNew->pLeft = pSplit; | |
598 pSplit->pParent = pNew; | |
599 } | |
600 | |
601 /* | |
602 ** Parse the fts3 query expression found in buffer z, length n. This function | |
603 ** returns either when the end of the buffer is reached or an unmatched | |
604 ** closing bracket - ')' - is encountered. | |
605 ** | |
606 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the | |
607 ** parsed form of the expression and *pnConsumed is set to the number of | |
608 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM | |
609 ** (out of memory error) or SQLITE_ERROR (parse error) is returned. | |
610 */ | |
611 static int fts3ExprParse( | |
612 ParseContext *pParse, /* fts3 query parse context */ | |
613 const char *z, int n, /* Text of MATCH query */ | |
614 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ | |
615 int *pnConsumed /* OUT: Number of bytes consumed */ | |
616 ){ | |
617 Fts3Expr *pRet = 0; | |
618 Fts3Expr *pPrev = 0; | |
619 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ | |
620 int nIn = n; | |
621 const char *zIn = z; | |
622 int rc = SQLITE_OK; | |
623 int isRequirePhrase = 1; | |
624 | |
625 while( rc==SQLITE_OK ){ | |
626 Fts3Expr *p = 0; | |
627 int nByte = 0; | |
628 | |
629 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); | |
630 assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); | |
631 if( rc==SQLITE_OK ){ | |
632 if( p ){ | |
633 int isPhrase; | |
634 | |
635 if( !sqlite3_fts3_enable_parentheses | |
636 && p->eType==FTSQUERY_PHRASE && pParse->isNot | |
637 ){ | |
638 /* Create an implicit NOT operator. */ | |
639 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); | |
640 if( !pNot ){ | |
641 sqlite3Fts3ExprFree(p); | |
642 rc = SQLITE_NOMEM; | |
643 goto exprparse_out; | |
644 } | |
645 pNot->eType = FTSQUERY_NOT; | |
646 pNot->pRight = p; | |
647 p->pParent = pNot; | |
648 if( pNotBranch ){ | |
649 pNot->pLeft = pNotBranch; | |
650 pNotBranch->pParent = pNot; | |
651 } | |
652 pNotBranch = pNot; | |
653 p = pPrev; | |
654 }else{ | |
655 int eType = p->eType; | |
656 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); | |
657 | |
658 /* The isRequirePhrase variable is set to true if a phrase or | |
659 ** an expression contained in parenthesis is required. If a | |
660 ** binary operator (AND, OR, NOT or NEAR) is encounted when | |
661 ** isRequirePhrase is set, this is a syntax error. | |
662 */ | |
663 if( !isPhrase && isRequirePhrase ){ | |
664 sqlite3Fts3ExprFree(p); | |
665 rc = SQLITE_ERROR; | |
666 goto exprparse_out; | |
667 } | |
668 | |
669 if( isPhrase && !isRequirePhrase ){ | |
670 /* Insert an implicit AND operator. */ | |
671 Fts3Expr *pAnd; | |
672 assert( pRet && pPrev ); | |
673 pAnd = fts3MallocZero(sizeof(Fts3Expr)); | |
674 if( !pAnd ){ | |
675 sqlite3Fts3ExprFree(p); | |
676 rc = SQLITE_NOMEM; | |
677 goto exprparse_out; | |
678 } | |
679 pAnd->eType = FTSQUERY_AND; | |
680 insertBinaryOperator(&pRet, pPrev, pAnd); | |
681 pPrev = pAnd; | |
682 } | |
683 | |
684 /* This test catches attempts to make either operand of a NEAR | |
685 ** operator something other than a phrase. For example, either of | |
686 ** the following: | |
687 ** | |
688 ** (bracketed expression) NEAR phrase | |
689 ** phrase NEAR (bracketed expression) | |
690 ** | |
691 ** Return an error in either case. | |
692 */ | |
693 if( pPrev && ( | |
694 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) | |
695 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) | |
696 )){ | |
697 sqlite3Fts3ExprFree(p); | |
698 rc = SQLITE_ERROR; | |
699 goto exprparse_out; | |
700 } | |
701 | |
702 if( isPhrase ){ | |
703 if( pRet ){ | |
704 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); | |
705 pPrev->pRight = p; | |
706 p->pParent = pPrev; | |
707 }else{ | |
708 pRet = p; | |
709 } | |
710 }else{ | |
711 insertBinaryOperator(&pRet, pPrev, p); | |
712 } | |
713 isRequirePhrase = !isPhrase; | |
714 } | |
715 pPrev = p; | |
716 } | |
717 assert( nByte>0 ); | |
718 } | |
719 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); | |
720 nIn -= nByte; | |
721 zIn += nByte; | |
722 } | |
723 | |
724 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ | |
725 rc = SQLITE_ERROR; | |
726 } | |
727 | |
728 if( rc==SQLITE_DONE ){ | |
729 rc = SQLITE_OK; | |
730 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ | |
731 if( !pRet ){ | |
732 rc = SQLITE_ERROR; | |
733 }else{ | |
734 Fts3Expr *pIter = pNotBranch; | |
735 while( pIter->pLeft ){ | |
736 pIter = pIter->pLeft; | |
737 } | |
738 pIter->pLeft = pRet; | |
739 pRet->pParent = pIter; | |
740 pRet = pNotBranch; | |
741 } | |
742 } | |
743 } | |
744 *pnConsumed = n - nIn; | |
745 | |
746 exprparse_out: | |
747 if( rc!=SQLITE_OK ){ | |
748 sqlite3Fts3ExprFree(pRet); | |
749 sqlite3Fts3ExprFree(pNotBranch); | |
750 pRet = 0; | |
751 } | |
752 *ppExpr = pRet; | |
753 return rc; | |
754 } | |
755 | |
756 /* | |
757 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed | |
758 ** as the only argument is more than nMaxDepth. | |
759 */ | |
760 static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ | |
761 int rc = SQLITE_OK; | |
762 if( p ){ | |
763 if( nMaxDepth<0 ){ | |
764 rc = SQLITE_TOOBIG; | |
765 }else{ | |
766 rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); | |
767 if( rc==SQLITE_OK ){ | |
768 rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); | |
769 } | |
770 } | |
771 } | |
772 return rc; | |
773 } | |
774 | |
775 /* | |
776 ** This function attempts to transform the expression tree at (*pp) to | |
777 ** an equivalent but more balanced form. The tree is modified in place. | |
778 ** If successful, SQLITE_OK is returned and (*pp) set to point to the | |
779 ** new root expression node. | |
780 ** | |
781 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree. | |
782 ** | |
783 ** Otherwise, if an error occurs, an SQLite error code is returned and | |
784 ** expression (*pp) freed. | |
785 */ | |
786 static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ | |
787 int rc = SQLITE_OK; /* Return code */ | |
788 Fts3Expr *pRoot = *pp; /* Initial root node */ | |
789 Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ | |
790 int eType = pRoot->eType; /* Type of node in this tree */ | |
791 | |
792 if( nMaxDepth==0 ){ | |
793 rc = SQLITE_ERROR; | |
794 } | |
795 | |
796 if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ | |
797 Fts3Expr **apLeaf; | |
798 apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); | |
799 if( 0==apLeaf ){ | |
800 rc = SQLITE_NOMEM; | |
801 }else{ | |
802 memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); | |
803 } | |
804 | |
805 if( rc==SQLITE_OK ){ | |
806 int i; | |
807 Fts3Expr *p; | |
808 | |
809 /* Set $p to point to the left-most leaf in the tree of eType nodes. */ | |
810 for(p=pRoot; p->eType==eType; p=p->pLeft){ | |
811 assert( p->pParent==0 || p->pParent->pLeft==p ); | |
812 assert( p->pLeft && p->pRight ); | |
813 } | |
814 | |
815 /* This loop runs once for each leaf in the tree of eType nodes. */ | |
816 while( 1 ){ | |
817 int iLvl; | |
818 Fts3Expr *pParent = p->pParent; /* Current parent of p */ | |
819 | |
820 assert( pParent==0 || pParent->pLeft==p ); | |
821 p->pParent = 0; | |
822 if( pParent ){ | |
823 pParent->pLeft = 0; | |
824 }else{ | |
825 pRoot = 0; | |
826 } | |
827 rc = fts3ExprBalance(&p, nMaxDepth-1); | |
828 if( rc!=SQLITE_OK ) break; | |
829 | |
830 for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ | |
831 if( apLeaf[iLvl]==0 ){ | |
832 apLeaf[iLvl] = p; | |
833 p = 0; | |
834 }else{ | |
835 assert( pFree ); | |
836 pFree->pLeft = apLeaf[iLvl]; | |
837 pFree->pRight = p; | |
838 pFree->pLeft->pParent = pFree; | |
839 pFree->pRight->pParent = pFree; | |
840 | |
841 p = pFree; | |
842 pFree = pFree->pParent; | |
843 p->pParent = 0; | |
844 apLeaf[iLvl] = 0; | |
845 } | |
846 } | |
847 if( p ){ | |
848 sqlite3Fts3ExprFree(p); | |
849 rc = SQLITE_TOOBIG; | |
850 break; | |
851 } | |
852 | |
853 /* If that was the last leaf node, break out of the loop */ | |
854 if( pParent==0 ) break; | |
855 | |
856 /* Set $p to point to the next leaf in the tree of eType nodes */ | |
857 for(p=pParent->pRight; p->eType==eType; p=p->pLeft); | |
858 | |
859 /* Remove pParent from the original tree. */ | |
860 assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); | |
861 pParent->pRight->pParent = pParent->pParent; | |
862 if( pParent->pParent ){ | |
863 pParent->pParent->pLeft = pParent->pRight; | |
864 }else{ | |
865 assert( pParent==pRoot ); | |
866 pRoot = pParent->pRight; | |
867 } | |
868 | |
869 /* Link pParent into the free node list. It will be used as an | |
870 ** internal node of the new tree. */ | |
871 pParent->pParent = pFree; | |
872 pFree = pParent; | |
873 } | |
874 | |
875 if( rc==SQLITE_OK ){ | |
876 p = 0; | |
877 for(i=0; i<nMaxDepth; i++){ | |
878 if( apLeaf[i] ){ | |
879 if( p==0 ){ | |
880 p = apLeaf[i]; | |
881 p->pParent = 0; | |
882 }else{ | |
883 assert( pFree!=0 ); | |
884 pFree->pRight = p; | |
885 pFree->pLeft = apLeaf[i]; | |
886 pFree->pLeft->pParent = pFree; | |
887 pFree->pRight->pParent = pFree; | |
888 | |
889 p = pFree; | |
890 pFree = pFree->pParent; | |
891 p->pParent = 0; | |
892 } | |
893 } | |
894 } | |
895 pRoot = p; | |
896 }else{ | |
897 /* An error occurred. Delete the contents of the apLeaf[] array | |
898 ** and pFree list. Everything else is cleaned up by the call to | |
899 ** sqlite3Fts3ExprFree(pRoot) below. */ | |
900 Fts3Expr *pDel; | |
901 for(i=0; i<nMaxDepth; i++){ | |
902 sqlite3Fts3ExprFree(apLeaf[i]); | |
903 } | |
904 while( (pDel=pFree)!=0 ){ | |
905 pFree = pDel->pParent; | |
906 sqlite3_free(pDel); | |
907 } | |
908 } | |
909 | |
910 assert( pFree==0 ); | |
911 sqlite3_free( apLeaf ); | |
912 } | |
913 } | |
914 | |
915 if( rc!=SQLITE_OK ){ | |
916 sqlite3Fts3ExprFree(pRoot); | |
917 pRoot = 0; | |
918 } | |
919 *pp = pRoot; | |
920 return rc; | |
921 } | |
922 | |
923 /* | |
924 ** This function is similar to sqlite3Fts3ExprParse(), with the following | |
925 ** differences: | |
926 ** | |
927 ** 1. It does not do expression rebalancing. | |
928 ** 2. It does not check that the expression does not exceed the | |
929 ** maximum allowable depth. | |
930 ** 3. Even if it fails, *ppExpr may still be set to point to an | |
931 ** expression tree. It should be deleted using sqlite3Fts3ExprFree() | |
932 ** in this case. | |
933 */ | |
934 static int fts3ExprParseUnbalanced( | |
935 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ | |
936 int iLangid, /* Language id for tokenizer */ | |
937 char **azCol, /* Array of column names for fts3 table */ | |
938 int bFts4, /* True to allow FTS4-only syntax */ | |
939 int nCol, /* Number of entries in azCol[] */ | |
940 int iDefaultCol, /* Default column to query */ | |
941 const char *z, int n, /* Text of MATCH query */ | |
942 Fts3Expr **ppExpr /* OUT: Parsed query structure */ | |
943 ){ | |
944 int nParsed; | |
945 int rc; | |
946 ParseContext sParse; | |
947 | |
948 memset(&sParse, 0, sizeof(ParseContext)); | |
949 sParse.pTokenizer = pTokenizer; | |
950 sParse.iLangid = iLangid; | |
951 sParse.azCol = (const char **)azCol; | |
952 sParse.nCol = nCol; | |
953 sParse.iDefaultCol = iDefaultCol; | |
954 sParse.bFts4 = bFts4; | |
955 if( z==0 ){ | |
956 *ppExpr = 0; | |
957 return SQLITE_OK; | |
958 } | |
959 if( n<0 ){ | |
960 n = (int)strlen(z); | |
961 } | |
962 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); | |
963 assert( rc==SQLITE_OK || *ppExpr==0 ); | |
964 | |
965 /* Check for mismatched parenthesis */ | |
966 if( rc==SQLITE_OK && sParse.nNest ){ | |
967 rc = SQLITE_ERROR; | |
968 } | |
969 | |
970 return rc; | |
971 } | |
972 | |
973 /* | |
974 ** Parameters z and n contain a pointer to and length of a buffer containing | |
975 ** an fts3 query expression, respectively. This function attempts to parse the | |
976 ** query expression and create a tree of Fts3Expr structures representing the | |
977 ** parsed expression. If successful, *ppExpr is set to point to the head | |
978 ** of the parsed expression tree and SQLITE_OK is returned. If an error | |
979 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse | |
980 ** error) is returned and *ppExpr is set to 0. | |
981 ** | |
982 ** If parameter n is a negative number, then z is assumed to point to a | |
983 ** nul-terminated string and the length is determined using strlen(). | |
984 ** | |
985 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to | |
986 ** use to normalize query tokens while parsing the expression. The azCol[] | |
987 ** array, which is assumed to contain nCol entries, should contain the names | |
988 ** of each column in the target fts3 table, in order from left to right. | |
989 ** Column names must be nul-terminated strings. | |
990 ** | |
991 ** The iDefaultCol parameter should be passed the index of the table column | |
992 ** that appears on the left-hand-side of the MATCH operator (the default | |
993 ** column to match against for tokens for which a column name is not explicitly | |
994 ** specified as part of the query string), or -1 if tokens may by default | |
995 ** match any table column. | |
996 */ | |
997 int sqlite3Fts3ExprParse( | |
998 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ | |
999 int iLangid, /* Language id for tokenizer */ | |
1000 char **azCol, /* Array of column names for fts3 table */ | |
1001 int bFts4, /* True to allow FTS4-only syntax */ | |
1002 int nCol, /* Number of entries in azCol[] */ | |
1003 int iDefaultCol, /* Default column to query */ | |
1004 const char *z, int n, /* Text of MATCH query */ | |
1005 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ | |
1006 char **pzErr /* OUT: Error message (sqlite3_malloc) */ | |
1007 ){ | |
1008 int rc = fts3ExprParseUnbalanced( | |
1009 pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr | |
1010 ); | |
1011 | |
1012 /* Rebalance the expression. And check that its depth does not exceed | |
1013 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ | |
1014 if( rc==SQLITE_OK && *ppExpr ){ | |
1015 rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); | |
1016 if( rc==SQLITE_OK ){ | |
1017 rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); | |
1018 } | |
1019 } | |
1020 | |
1021 if( rc!=SQLITE_OK ){ | |
1022 sqlite3Fts3ExprFree(*ppExpr); | |
1023 *ppExpr = 0; | |
1024 if( rc==SQLITE_TOOBIG ){ | |
1025 *pzErr = sqlite3_mprintf( | |
1026 "FTS expression tree is too large (maximum depth %d)", | |
1027 SQLITE_FTS3_MAX_EXPR_DEPTH | |
1028 ); | |
1029 rc = SQLITE_ERROR; | |
1030 }else if( rc==SQLITE_ERROR ){ | |
1031 *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); | |
1032 } | |
1033 } | |
1034 | |
1035 return rc; | |
1036 } | |
1037 | |
1038 /* | |
1039 ** Free a single node of an expression tree. | |
1040 */ | |
1041 static void fts3FreeExprNode(Fts3Expr *p){ | |
1042 assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); | |
1043 sqlite3Fts3EvalPhraseCleanup(p->pPhrase); | |
1044 sqlite3_free(p->aMI); | |
1045 sqlite3_free(p); | |
1046 } | |
1047 | |
1048 /* | |
1049 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). | |
1050 ** | |
1051 ** This function would be simpler if it recursively called itself. But | |
1052 ** that would mean passing a sufficiently large expression to ExprParse() | |
1053 ** could cause a stack overflow. | |
1054 */ | |
1055 void sqlite3Fts3ExprFree(Fts3Expr *pDel){ | |
1056 Fts3Expr *p; | |
1057 assert( pDel==0 || pDel->pParent==0 ); | |
1058 for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ | |
1059 assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); | |
1060 } | |
1061 while( p ){ | |
1062 Fts3Expr *pParent = p->pParent; | |
1063 fts3FreeExprNode(p); | |
1064 if( pParent && p==pParent->pLeft && pParent->pRight ){ | |
1065 p = pParent->pRight; | |
1066 while( p && (p->pLeft || p->pRight) ){ | |
1067 assert( p==p->pParent->pRight || p==p->pParent->pLeft ); | |
1068 p = (p->pLeft ? p->pLeft : p->pRight); | |
1069 } | |
1070 }else{ | |
1071 p = pParent; | |
1072 } | |
1073 } | |
1074 } | |
1075 | |
1076 /**************************************************************************** | |
1077 ***************************************************************************** | |
1078 ** Everything after this point is just test code. | |
1079 */ | |
1080 | |
1081 #ifdef SQLITE_TEST | |
1082 | |
1083 #include <stdio.h> | |
1084 | |
1085 /* | |
1086 ** Function to query the hash-table of tokenizers (see README.tokenizers). | |
1087 */ | |
1088 static int queryTestTokenizer( | |
1089 sqlite3 *db, | |
1090 const char *zName, | |
1091 const sqlite3_tokenizer_module **pp | |
1092 ){ | |
1093 int rc; | |
1094 sqlite3_stmt *pStmt; | |
1095 const char zSql[] = "SELECT fts3_tokenizer(?)"; | |
1096 | |
1097 *pp = 0; | |
1098 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | |
1099 if( rc!=SQLITE_OK ){ | |
1100 return rc; | |
1101 } | |
1102 | |
1103 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); | |
1104 if( SQLITE_ROW==sqlite3_step(pStmt) ){ | |
1105 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ | |
1106 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); | |
1107 } | |
1108 } | |
1109 | |
1110 return sqlite3_finalize(pStmt); | |
1111 } | |
1112 | |
1113 /* | |
1114 ** Return a pointer to a buffer containing a text representation of the | |
1115 ** expression passed as the first argument. The buffer is obtained from | |
1116 ** sqlite3_malloc(). It is the responsibility of the caller to use | |
1117 ** sqlite3_free() to release the memory. If an OOM condition is encountered, | |
1118 ** NULL is returned. | |
1119 ** | |
1120 ** If the second argument is not NULL, then its contents are prepended to | |
1121 ** the returned expression text and then freed using sqlite3_free(). | |
1122 */ | |
1123 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ | |
1124 if( pExpr==0 ){ | |
1125 return sqlite3_mprintf(""); | |
1126 } | |
1127 switch( pExpr->eType ){ | |
1128 case FTSQUERY_PHRASE: { | |
1129 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
1130 int i; | |
1131 zBuf = sqlite3_mprintf( | |
1132 "%zPHRASE %d 0", zBuf, pPhrase->iColumn); | |
1133 for(i=0; zBuf && i<pPhrase->nToken; i++){ | |
1134 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, | |
1135 pPhrase->aToken[i].n, pPhrase->aToken[i].z, | |
1136 (pPhrase->aToken[i].isPrefix?"+":"") | |
1137 ); | |
1138 } | |
1139 return zBuf; | |
1140 } | |
1141 | |
1142 case FTSQUERY_NEAR: | |
1143 zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); | |
1144 break; | |
1145 case FTSQUERY_NOT: | |
1146 zBuf = sqlite3_mprintf("%zNOT ", zBuf); | |
1147 break; | |
1148 case FTSQUERY_AND: | |
1149 zBuf = sqlite3_mprintf("%zAND ", zBuf); | |
1150 break; | |
1151 case FTSQUERY_OR: | |
1152 zBuf = sqlite3_mprintf("%zOR ", zBuf); | |
1153 break; | |
1154 } | |
1155 | |
1156 if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); | |
1157 if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); | |
1158 if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); | |
1159 | |
1160 if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); | |
1161 if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); | |
1162 | |
1163 return zBuf; | |
1164 } | |
1165 | |
1166 /* | |
1167 ** This is the implementation of a scalar SQL function used to test the | |
1168 ** expression parser. It should be called as follows: | |
1169 ** | |
1170 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); | |
1171 ** | |
1172 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used | |
1173 ** to parse the query expression (see README.tokenizers). The second argument | |
1174 ** is the query expression to parse. Each subsequent argument is the name | |
1175 ** of a column of the fts3 table that the query expression may refer to. | |
1176 ** For example: | |
1177 ** | |
1178 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); | |
1179 */ | |
1180 static void fts3ExprTest( | |
1181 sqlite3_context *context, | |
1182 int argc, | |
1183 sqlite3_value **argv | |
1184 ){ | |
1185 sqlite3_tokenizer_module const *pModule = 0; | |
1186 sqlite3_tokenizer *pTokenizer = 0; | |
1187 int rc; | |
1188 char **azCol = 0; | |
1189 const char *zExpr; | |
1190 int nExpr; | |
1191 int nCol; | |
1192 int ii; | |
1193 Fts3Expr *pExpr; | |
1194 char *zBuf = 0; | |
1195 sqlite3 *db = sqlite3_context_db_handle(context); | |
1196 | |
1197 if( argc<3 ){ | |
1198 sqlite3_result_error(context, | |
1199 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 | |
1200 ); | |
1201 return; | |
1202 } | |
1203 | |
1204 rc = queryTestTokenizer(db, | |
1205 (const char *)sqlite3_value_text(argv[0]), &pModule); | |
1206 if( rc==SQLITE_NOMEM ){ | |
1207 sqlite3_result_error_nomem(context); | |
1208 goto exprtest_out; | |
1209 }else if( !pModule ){ | |
1210 sqlite3_result_error(context, "No such tokenizer module", -1); | |
1211 goto exprtest_out; | |
1212 } | |
1213 | |
1214 rc = pModule->xCreate(0, 0, &pTokenizer); | |
1215 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); | |
1216 if( rc==SQLITE_NOMEM ){ | |
1217 sqlite3_result_error_nomem(context); | |
1218 goto exprtest_out; | |
1219 } | |
1220 pTokenizer->pModule = pModule; | |
1221 | |
1222 zExpr = (const char *)sqlite3_value_text(argv[1]); | |
1223 nExpr = sqlite3_value_bytes(argv[1]); | |
1224 nCol = argc-2; | |
1225 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); | |
1226 if( !azCol ){ | |
1227 sqlite3_result_error_nomem(context); | |
1228 goto exprtest_out; | |
1229 } | |
1230 for(ii=0; ii<nCol; ii++){ | |
1231 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); | |
1232 } | |
1233 | |
1234 if( sqlite3_user_data(context) ){ | |
1235 char *zDummy = 0; | |
1236 rc = sqlite3Fts3ExprParse( | |
1237 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy | |
1238 ); | |
1239 assert( rc==SQLITE_OK || pExpr==0 ); | |
1240 sqlite3_free(zDummy); | |
1241 }else{ | |
1242 rc = fts3ExprParseUnbalanced( | |
1243 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr | |
1244 ); | |
1245 } | |
1246 | |
1247 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ | |
1248 sqlite3Fts3ExprFree(pExpr); | |
1249 sqlite3_result_error(context, "Error parsing expression", -1); | |
1250 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ | |
1251 sqlite3_result_error_nomem(context); | |
1252 }else{ | |
1253 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); | |
1254 sqlite3_free(zBuf); | |
1255 } | |
1256 | |
1257 sqlite3Fts3ExprFree(pExpr); | |
1258 | |
1259 exprtest_out: | |
1260 if( pModule && pTokenizer ){ | |
1261 rc = pModule->xDestroy(pTokenizer); | |
1262 } | |
1263 sqlite3_free(azCol); | |
1264 } | |
1265 | |
1266 /* | |
1267 ** Register the query expression parser test function fts3_exprtest() | |
1268 ** with database connection db. | |
1269 */ | |
1270 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ | |
1271 int rc = sqlite3_create_function( | |
1272 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 | |
1273 ); | |
1274 if( rc==SQLITE_OK ){ | |
1275 rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", | |
1276 -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 | |
1277 ); | |
1278 } | |
1279 return rc; | |
1280 } | |
1281 | |
1282 #endif | |
1283 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | |
OLD | NEW |