Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(279)

Side by Side Diff: third_party/sqlite/ext/fts3/fts3_expr.c

Issue 3108030: Move bundled copy of sqlite one level deeper to better separate it... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/sqlite/ext/fts3/fts3_expr.h ('k') | third_party/sqlite/ext/fts3/fts3_hash.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 ** 2008 Nov 28
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This module contains code that implements a parser for fts3 query strings
14 ** (the right-hand argument to the MATCH operator). Because the supported
15 ** syntax is relatively simple, the whole tokenizer/parser system is
16 ** hand-coded. The public interface to this module is declared in source
17 ** code file "fts3_expr.h".
18 */
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
20
21 /*
22 ** By default, this module parses the legacy syntax that has been
23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
24 ** is defined, then it uses the new syntax. The differences between
25 ** the new and the old syntaxes are:
26 **
27 ** a) The new syntax supports parenthesis. The old does not.
28 **
29 ** b) The new syntax supports the AND and NOT operators. The old does not.
30 **
31 ** c) The old syntax supports the "-" token qualifier. This is not
32 ** supported by the new syntax (it is replaced by the NOT operator).
33 **
34 ** d) When using the old syntax, the OR operator has a greater precedence
35 ** than an implicit AND. When using the new, both implicity and explicit
36 ** AND operators have a higher precedence than OR.
37 **
38 ** If compiled with SQLITE_TEST defined, then this module exports the
39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
40 ** to zero causes the module to use the old syntax. If it is set to
41 ** non-zero the new syntax is activated. This is so both syntaxes can
42 ** be tested using a single build of testfixture.
43 */
44 #ifdef SQLITE_TEST
45 int sqlite3_fts3_enable_parentheses = 0;
46 #else
47 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
48 # define sqlite3_fts3_enable_parentheses 1
49 # else
50 # define sqlite3_fts3_enable_parentheses 0
51 # endif
52 #endif
53
54 /*
55 ** Default span for NEAR operators.
56 */
57 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
58
59 #include "fts3_expr.h"
60 #include "sqlite3.h"
61 #include <string.h>
62 #include <assert.h>
63
64 typedef struct ParseContext ParseContext;
65 struct ParseContext {
66 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
67 const char **azCol; /* Array of column names for fts3 table */
68 int nCol; /* Number of entries in azCol[] */
69 int iDefaultCol; /* Default column to query */
70 sqlite3_context *pCtx; /* Write error message here */
71 int nNest; /* Number of nested brackets */
72 };
73
74 /*
75 ** This function is equivalent to the standard isspace() function.
76 **
77 ** The standard isspace() can be awkward to use safely, because although it
78 ** is defined to accept an argument of type int, its behaviour when passed
79 ** an integer that falls outside of the range of the unsigned char type
80 ** is undefined (and sometimes, "undefined" means segfault). This wrapper
81 ** is defined to accept an argument of type char, and always returns 0 for
82 ** any values that fall outside of the range of the unsigned char type (i.e.
83 ** negative values).
84 */
85 static int fts3isspace(char c){
86 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
87 }
88
89 /*
90 ** Extract the next token from buffer z (length n) using the tokenizer
91 ** and other information (column names etc.) in pParse. Create an Fts3Expr
92 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
93 ** single token and set *ppExpr to point to it. If the end of the buffer is
94 ** reached before a token is found, set *ppExpr to zero. It is the
95 ** responsibility of the caller to eventually deallocate the allocated
96 ** Fts3Expr structure (if any) by passing it to sqlite3_free().
97 **
98 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
99 ** fails.
100 */
101 static int getNextToken(
102 ParseContext *pParse, /* fts3 query parse context */
103 int iCol, /* Value for Fts3Phrase.iColumn */
104 const char *z, int n, /* Input string */
105 Fts3Expr **ppExpr, /* OUT: expression */
106 int *pnConsumed /* OUT: Number of bytes consumed */
107 ){
108 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
109 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
110 int rc;
111 sqlite3_tokenizer_cursor *pCursor;
112 Fts3Expr *pRet = 0;
113 int nConsumed = 0;
114
115 rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
116 if( rc==SQLITE_OK ){
117 const char *zToken;
118 int nToken, iStart, iEnd, iPosition;
119 int nByte; /* total space to allocate */
120
121 pCursor->pTokenizer = pTokenizer;
122 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
123
124 if( rc==SQLITE_OK ){
125 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
126 pRet = (Fts3Expr *)sqlite3_malloc(nByte);
127 if( !pRet ){
128 rc = SQLITE_NOMEM;
129 }else{
130 memset(pRet, 0, nByte);
131 pRet->eType = FTSQUERY_PHRASE;
132 pRet->pPhrase = (Fts3Phrase *)&pRet[1];
133 pRet->pPhrase->nToken = 1;
134 pRet->pPhrase->iColumn = iCol;
135 pRet->pPhrase->aToken[0].n = nToken;
136 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
137 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
138
139 if( iEnd<n && z[iEnd]=='*' ){
140 pRet->pPhrase->aToken[0].isPrefix = 1;
141 iEnd++;
142 }
143 if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
144 pRet->pPhrase->isNot = 1;
145 }
146 }
147 nConsumed = iEnd;
148 }
149
150 pModule->xClose(pCursor);
151 }
152
153 *pnConsumed = nConsumed;
154 *ppExpr = pRet;
155 return rc;
156 }
157
158
159 /*
160 ** Enlarge a memory allocation. If an out-of-memory allocation occurs,
161 ** then free the old allocation.
162 */
163 void *fts3ReallocOrFree(void *pOrig, int nNew){
164 void *pRet = sqlite3_realloc(pOrig, nNew);
165 if( !pRet ){
166 sqlite3_free(pOrig);
167 }
168 return pRet;
169 }
170
171 /*
172 ** Buffer zInput, length nInput, contains the contents of a quoted string
173 ** that appeared as part of an fts3 query expression. Neither quote character
174 ** is included in the buffer. This function attempts to tokenize the entire
175 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
176 ** containing the results.
177 **
178 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
179 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
180 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
181 ** to 0.
182 */
183 static int getNextString(
184 ParseContext *pParse, /* fts3 query parse context */
185 const char *zInput, int nInput, /* Input string */
186 Fts3Expr **ppExpr /* OUT: expression */
187 ){
188 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
189 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
190 int rc;
191 Fts3Expr *p = 0;
192 sqlite3_tokenizer_cursor *pCursor = 0;
193 char *zTemp = 0;
194 int nTemp = 0;
195
196 rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
197 if( rc==SQLITE_OK ){
198 int ii;
199 pCursor->pTokenizer = pTokenizer;
200 for(ii=0; rc==SQLITE_OK; ii++){
201 const char *zToken;
202 int nToken, iBegin, iEnd, iPos;
203 rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
204 if( rc==SQLITE_OK ){
205 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
206 p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken));
207 zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
208 if( !p || !zTemp ){
209 goto no_mem;
210 }
211 if( ii==0 ){
212 memset(p, 0, nByte);
213 p->pPhrase = (Fts3Phrase *)&p[1];
214 }
215 p->pPhrase = (Fts3Phrase *)&p[1];
216 p->pPhrase->nToken = ii+1;
217 p->pPhrase->aToken[ii].n = nToken;
218 memcpy(&zTemp[nTemp], zToken, nToken);
219 nTemp += nToken;
220 if( iEnd<nInput && zInput[iEnd]=='*' ){
221 p->pPhrase->aToken[ii].isPrefix = 1;
222 }else{
223 p->pPhrase->aToken[ii].isPrefix = 0;
224 }
225 }
226 }
227
228 pModule->xClose(pCursor);
229 pCursor = 0;
230 }
231
232 if( rc==SQLITE_DONE ){
233 int jj;
234 char *zNew;
235 int nNew = 0;
236 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
237 nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(struct PhraseToken);
238 p = fts3ReallocOrFree(p, nByte + nTemp);
239 if( !p ){
240 goto no_mem;
241 }
242 if( zTemp ){
243 zNew = &(((char *)p)[nByte]);
244 memcpy(zNew, zTemp, nTemp);
245 }else{
246 memset(p, 0, nByte+nTemp);
247 }
248 p->pPhrase = (Fts3Phrase *)&p[1];
249 for(jj=0; jj<p->pPhrase->nToken; jj++){
250 p->pPhrase->aToken[jj].z = &zNew[nNew];
251 nNew += p->pPhrase->aToken[jj].n;
252 }
253 sqlite3_free(zTemp);
254 p->eType = FTSQUERY_PHRASE;
255 p->pPhrase->iColumn = pParse->iDefaultCol;
256 rc = SQLITE_OK;
257 }
258
259 *ppExpr = p;
260 return rc;
261 no_mem:
262
263 if( pCursor ){
264 pModule->xClose(pCursor);
265 }
266 sqlite3_free(zTemp);
267 sqlite3_free(p);
268 *ppExpr = 0;
269 return SQLITE_NOMEM;
270 }
271
272 /*
273 ** Function getNextNode(), which is called by fts3ExprParse(), may itself
274 ** call fts3ExprParse(). So this forward declaration is required.
275 */
276 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
277
278 /*
279 ** The output variable *ppExpr is populated with an allocated Fts3Expr
280 ** structure, or set to 0 if the end of the input buffer is reached.
281 **
282 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
283 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
284 ** If SQLITE_ERROR is returned, pContext is populated with an error message.
285 */
286 static int getNextNode(
287 ParseContext *pParse, /* fts3 query parse context */
288 const char *z, int n, /* Input string */
289 Fts3Expr **ppExpr, /* OUT: expression */
290 int *pnConsumed /* OUT: Number of bytes consumed */
291 ){
292 static const struct Fts3Keyword {
293 char z[4]; /* Keyword text */
294 unsigned char n; /* Length of the keyword */
295 unsigned char parenOnly; /* Only valid in paren mode */
296 unsigned char eType; /* Keyword code */
297 } aKeyword[] = {
298 { "OR" , 2, 0, FTSQUERY_OR },
299 { "AND", 3, 1, FTSQUERY_AND },
300 { "NOT", 3, 1, FTSQUERY_NOT },
301 { "NEAR", 4, 0, FTSQUERY_NEAR }
302 };
303 int ii;
304 int iCol;
305 int iColLen;
306 int rc;
307 Fts3Expr *pRet = 0;
308
309 const char *zInput = z;
310 int nInput = n;
311
312 /* Skip over any whitespace before checking for a keyword, an open or
313 ** close bracket, or a quoted string.
314 */
315 while( nInput>0 && fts3isspace(*zInput) ){
316 nInput--;
317 zInput++;
318 }
319 if( nInput==0 ){
320 return SQLITE_DONE;
321 }
322
323 /* See if we are dealing with a keyword. */
324 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
325 const struct Fts3Keyword *pKey = &aKeyword[ii];
326
327 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
328 continue;
329 }
330
331 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
332 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
333 int nKey = pKey->n;
334 char cNext;
335
336 /* If this is a "NEAR" keyword, check for an explicit nearness. */
337 if( pKey->eType==FTSQUERY_NEAR ){
338 assert( nKey==4 );
339 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
340 nNear = 0;
341 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
342 nNear = nNear * 10 + (zInput[nKey] - '0');
343 }
344 }
345 }
346
347 /* At this point this is probably a keyword. But for that to be true,
348 ** the next byte must contain either whitespace, an open or close
349 ** parenthesis, a quote character, or EOF.
350 */
351 cNext = zInput[nKey];
352 if( fts3isspace(cNext)
353 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
354 ){
355 pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
356 memset(pRet, 0, sizeof(Fts3Expr));
357 pRet->eType = pKey->eType;
358 pRet->nNear = nNear;
359 *ppExpr = pRet;
360 *pnConsumed = (zInput - z) + nKey;
361 return SQLITE_OK;
362 }
363
364 /* Turns out that wasn't a keyword after all. This happens if the
365 ** user has supplied a token such as "ORacle". Continue.
366 */
367 }
368 }
369
370 /* Check for an open bracket. */
371 if( sqlite3_fts3_enable_parentheses ){
372 if( *zInput=='(' ){
373 int nConsumed;
374 int rc;
375 pParse->nNest++;
376 rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
377 if( rc==SQLITE_OK && !*ppExpr ){
378 rc = SQLITE_DONE;
379 }
380 *pnConsumed = (zInput - z) + 1 + nConsumed;
381 return rc;
382 }
383
384 /* Check for a close bracket. */
385 if( *zInput==')' ){
386 pParse->nNest--;
387 *pnConsumed = (zInput - z) + 1;
388 return SQLITE_DONE;
389 }
390 }
391
392 /* See if we are dealing with a quoted phrase. If this is the case, then
393 ** search for the closing quote and pass the whole string to getNextString()
394 ** for processing. This is easy to do, as fts3 has no syntax for escaping
395 ** a quote character embedded in a string.
396 */
397 if( *zInput=='"' ){
398 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
399 *pnConsumed = (zInput - z) + ii + 1;
400 if( ii==nInput ){
401 return SQLITE_ERROR;
402 }
403 return getNextString(pParse, &zInput[1], ii-1, ppExpr);
404 }
405
406
407 /* If control flows to this point, this must be a regular token, or
408 ** the end of the input. Read a regular token using the sqlite3_tokenizer
409 ** interface. Before doing so, figure out if there is an explicit
410 ** column specifier for the token.
411 **
412 ** TODO: Strangely, it is not possible to associate a column specifier
413 ** with a quoted phrase, only with a single token. Not sure if this was
414 ** an implementation artifact or an intentional decision when fts3 was
415 ** first implemented. Whichever it was, this module duplicates the
416 ** limitation.
417 */
418 iCol = pParse->iDefaultCol;
419 iColLen = 0;
420 for(ii=0; ii<pParse->nCol; ii++){
421 const char *zStr = pParse->azCol[ii];
422 int nStr = strlen(zStr);
423 if( nInput>nStr && zInput[nStr]==':'
424 && sqlite3_strnicmp(zStr, zInput, nStr)==0
425 ){
426 iCol = ii;
427 iColLen = ((zInput - z) + nStr + 1);
428 break;
429 }
430 }
431 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
432 *pnConsumed += iColLen;
433 return rc;
434 }
435
436 /*
437 ** The argument is an Fts3Expr structure for a binary operator (any type
438 ** except an FTSQUERY_PHRASE). Return an integer value representing the
439 ** precedence of the operator. Lower values have a higher precedence (i.e.
440 ** group more tightly). For example, in the C language, the == operator
441 ** groups more tightly than ||, and would therefore have a higher precedence.
442 **
443 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
444 ** is defined), the order of the operators in precedence from highest to
445 ** lowest is:
446 **
447 ** NEAR
448 ** NOT
449 ** AND (including implicit ANDs)
450 ** OR
451 **
452 ** Note that when using the old query syntax, the OR operator has a higher
453 ** precedence than the AND operator.
454 */
455 static int opPrecedence(Fts3Expr *p){
456 assert( p->eType!=FTSQUERY_PHRASE );
457 if( sqlite3_fts3_enable_parentheses ){
458 return p->eType;
459 }else if( p->eType==FTSQUERY_NEAR ){
460 return 1;
461 }else if( p->eType==FTSQUERY_OR ){
462 return 2;
463 }
464 assert( p->eType==FTSQUERY_AND );
465 return 3;
466 }
467
468 /*
469 ** Argument ppHead contains a pointer to the current head of a query
470 ** expression tree being parsed. pPrev is the expression node most recently
471 ** inserted into the tree. This function adds pNew, which is always a binary
472 ** operator node, into the expression tree based on the relative precedence
473 ** of pNew and the existing nodes of the tree. This may result in the head
474 ** of the tree changing, in which case *ppHead is set to the new root node.
475 */
476 static void insertBinaryOperator(
477 Fts3Expr **ppHead, /* Pointer to the root node of a tree */
478 Fts3Expr *pPrev, /* Node most recently inserted into the tree */
479 Fts3Expr *pNew /* New binary node to insert into expression tree */
480 ){
481 Fts3Expr *pSplit = pPrev;
482 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
483 pSplit = pSplit->pParent;
484 }
485
486 if( pSplit->pParent ){
487 assert( pSplit->pParent->pRight==pSplit );
488 pSplit->pParent->pRight = pNew;
489 pNew->pParent = pSplit->pParent;
490 }else{
491 *ppHead = pNew;
492 }
493 pNew->pLeft = pSplit;
494 pSplit->pParent = pNew;
495 }
496
497 /*
498 ** Parse the fts3 query expression found in buffer z, length n. This function
499 ** returns either when the end of the buffer is reached or an unmatched
500 ** closing bracket - ')' - is encountered.
501 **
502 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
503 ** parsed form of the expression and *pnConsumed is set to the number of
504 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
505 ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
506 */
507 static int fts3ExprParse(
508 ParseContext *pParse, /* fts3 query parse context */
509 const char *z, int n, /* Text of MATCH query */
510 Fts3Expr **ppExpr, /* OUT: Parsed query structure */
511 int *pnConsumed /* OUT: Number of bytes consumed */
512 ){
513 Fts3Expr *pRet = 0;
514 Fts3Expr *pPrev = 0;
515 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
516 int nIn = n;
517 const char *zIn = z;
518 int rc = SQLITE_OK;
519 int isRequirePhrase = 1;
520
521 while( rc==SQLITE_OK ){
522 Fts3Expr *p = 0;
523 int nByte = 0;
524 rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
525 if( rc==SQLITE_OK ){
526 int isPhrase;
527
528 if( !sqlite3_fts3_enable_parentheses
529 && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot
530 ){
531 /* Create an implicit NOT operator. */
532 Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr));
533 if( !pNot ){
534 sqlite3Fts3ExprFree(p);
535 rc = SQLITE_NOMEM;
536 goto exprparse_out;
537 }
538 memset(pNot, 0, sizeof(Fts3Expr));
539 pNot->eType = FTSQUERY_NOT;
540 pNot->pRight = p;
541 if( pNotBranch ){
542 pNot->pLeft = pNotBranch;
543 }
544 pNotBranch = pNot;
545 p = pPrev;
546 }else{
547 int eType = p->eType;
548 assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
549 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
550
551 /* The isRequirePhrase variable is set to true if a phrase or
552 ** an expression contained in parenthesis is required. If a
553 ** binary operator (AND, OR, NOT or NEAR) is encounted when
554 ** isRequirePhrase is set, this is a syntax error.
555 */
556 if( !isPhrase && isRequirePhrase ){
557 sqlite3Fts3ExprFree(p);
558 rc = SQLITE_ERROR;
559 goto exprparse_out;
560 }
561
562 if( isPhrase && !isRequirePhrase ){
563 /* Insert an implicit AND operator. */
564 Fts3Expr *pAnd;
565 assert( pRet && pPrev );
566 pAnd = sqlite3_malloc(sizeof(Fts3Expr));
567 if( !pAnd ){
568 sqlite3Fts3ExprFree(p);
569 rc = SQLITE_NOMEM;
570 goto exprparse_out;
571 }
572 memset(pAnd, 0, sizeof(Fts3Expr));
573 pAnd->eType = FTSQUERY_AND;
574 insertBinaryOperator(&pRet, pPrev, pAnd);
575 pPrev = pAnd;
576 }
577
578 /* This test catches attempts to make either operand of a NEAR
579 ** operator something other than a phrase. For example, either of
580 ** the following:
581 **
582 ** (bracketed expression) NEAR phrase
583 ** phrase NEAR (bracketed expression)
584 **
585 ** Return an error in either case.
586 */
587 if( pPrev && (
588 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
589 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
590 )){
591 sqlite3Fts3ExprFree(p);
592 rc = SQLITE_ERROR;
593 goto exprparse_out;
594 }
595
596 if( isPhrase ){
597 if( pRet ){
598 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
599 pPrev->pRight = p;
600 p->pParent = pPrev;
601 }else{
602 pRet = p;
603 }
604 }else{
605 insertBinaryOperator(&pRet, pPrev, p);
606 }
607 isRequirePhrase = !isPhrase;
608 }
609 assert( nByte>0 );
610 }
611 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
612 nIn -= nByte;
613 zIn += nByte;
614 pPrev = p;
615 }
616
617 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
618 rc = SQLITE_ERROR;
619 }
620
621 if( rc==SQLITE_DONE ){
622 rc = SQLITE_OK;
623 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
624 if( !pRet ){
625 rc = SQLITE_ERROR;
626 }else{
627 Fts3Expr *pIter = pNotBranch;
628 while( pIter->pLeft ){
629 pIter = pIter->pLeft;
630 }
631 pIter->pLeft = pRet;
632 pRet = pNotBranch;
633 }
634 }
635 }
636 *pnConsumed = n - nIn;
637
638 exprparse_out:
639 if( rc!=SQLITE_OK ){
640 sqlite3Fts3ExprFree(pRet);
641 sqlite3Fts3ExprFree(pNotBranch);
642 pRet = 0;
643 }
644 *ppExpr = pRet;
645 return rc;
646 }
647
648 /*
649 ** Parameters z and n contain a pointer to and length of a buffer containing
650 ** an fts3 query expression, respectively. This function attempts to parse the
651 ** query expression and create a tree of Fts3Expr structures representing the
652 ** parsed expression. If successful, *ppExpr is set to point to the head
653 ** of the parsed expression tree and SQLITE_OK is returned. If an error
654 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
655 ** error) is returned and *ppExpr is set to 0.
656 **
657 ** If parameter n is a negative number, then z is assumed to point to a
658 ** nul-terminated string and the length is determined using strlen().
659 **
660 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
661 ** use to normalize query tokens while parsing the expression. The azCol[]
662 ** array, which is assumed to contain nCol entries, should contain the names
663 ** of each column in the target fts3 table, in order from left to right.
664 ** Column names must be nul-terminated strings.
665 **
666 ** The iDefaultCol parameter should be passed the index of the table column
667 ** that appears on the left-hand-side of the MATCH operator (the default
668 ** column to match against for tokens for which a column name is not explicitly
669 ** specified as part of the query string), or -1 if tokens may by default
670 ** match any table column.
671 */
672 int sqlite3Fts3ExprParse(
673 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
674 char **azCol, /* Array of column names for fts3 table */
675 int nCol, /* Number of entries in azCol[] */
676 int iDefaultCol, /* Default column to query */
677 const char *z, int n, /* Text of MATCH query */
678 Fts3Expr **ppExpr /* OUT: Parsed query structure */
679 ){
680 int nParsed;
681 int rc;
682 ParseContext sParse;
683 sParse.pTokenizer = pTokenizer;
684 sParse.azCol = (const char **)azCol;
685 sParse.nCol = nCol;
686 sParse.iDefaultCol = iDefaultCol;
687 sParse.nNest = 0;
688 if( z==0 ){
689 *ppExpr = 0;
690 return SQLITE_OK;
691 }
692 if( n<0 ){
693 n = strlen(z);
694 }
695 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
696
697 /* Check for mismatched parenthesis */
698 if( rc==SQLITE_OK && sParse.nNest ){
699 rc = SQLITE_ERROR;
700 sqlite3Fts3ExprFree(*ppExpr);
701 *ppExpr = 0;
702 }
703
704 return rc;
705 }
706
707 /*
708 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
709 */
710 void sqlite3Fts3ExprFree(Fts3Expr *p){
711 if( p ){
712 sqlite3Fts3ExprFree(p->pLeft);
713 sqlite3Fts3ExprFree(p->pRight);
714 sqlite3_free(p);
715 }
716 }
717
718 /****************************************************************************
719 *****************************************************************************
720 ** Everything after this point is just test code.
721 */
722
723 #ifdef SQLITE_TEST
724
725 #include <stdio.h>
726
727 /*
728 ** Function to query the hash-table of tokenizers (see README.tokenizers).
729 */
730 static int queryTestTokenizer(
731 sqlite3 *db,
732 const char *zName,
733 const sqlite3_tokenizer_module **pp
734 ){
735 int rc;
736 sqlite3_stmt *pStmt;
737 const char zSql[] = "SELECT fts3_tokenizer(?)";
738
739 *pp = 0;
740 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
741 if( rc!=SQLITE_OK ){
742 return rc;
743 }
744
745 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
746 if( SQLITE_ROW==sqlite3_step(pStmt) ){
747 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
748 memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
749 }
750 }
751
752 return sqlite3_finalize(pStmt);
753 }
754
755 /*
756 ** This function is part of the test interface for the query parser. It
757 ** writes a text representation of the query expression pExpr into the
758 ** buffer pointed to by argument zBuf. It is assumed that zBuf is large
759 ** enough to store the required text representation.
760 */
761 static void exprToString(Fts3Expr *pExpr, char *zBuf){
762 switch( pExpr->eType ){
763 case FTSQUERY_PHRASE: {
764 Fts3Phrase *pPhrase = pExpr->pPhrase;
765 int i;
766 zBuf += sprintf(zBuf, "PHRASE %d %d", pPhrase->iColumn, pPhrase->isNot);
767 for(i=0; i<pPhrase->nToken; i++){
768 zBuf += sprintf(zBuf," %.*s",pPhrase->aToken[i].n,pPhrase->aToken[i].z);
769 zBuf += sprintf(zBuf,"%s", (pPhrase->aToken[i].isPrefix?"+":""));
770 }
771 return;
772 }
773
774 case FTSQUERY_NEAR:
775 zBuf += sprintf(zBuf, "NEAR/%d ", pExpr->nNear);
776 break;
777 case FTSQUERY_NOT:
778 zBuf += sprintf(zBuf, "NOT ");
779 break;
780 case FTSQUERY_AND:
781 zBuf += sprintf(zBuf, "AND ");
782 break;
783 case FTSQUERY_OR:
784 zBuf += sprintf(zBuf, "OR ");
785 break;
786 }
787
788 zBuf += sprintf(zBuf, "{");
789 exprToString(pExpr->pLeft, zBuf);
790 zBuf += strlen(zBuf);
791 zBuf += sprintf(zBuf, "} ");
792
793 zBuf += sprintf(zBuf, "{");
794 exprToString(pExpr->pRight, zBuf);
795 zBuf += strlen(zBuf);
796 zBuf += sprintf(zBuf, "}");
797 }
798
799 /*
800 ** This is the implementation of a scalar SQL function used to test the
801 ** expression parser. It should be called as follows:
802 **
803 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
804 **
805 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
806 ** to parse the query expression (see README.tokenizers). The second argument
807 ** is the query expression to parse. Each subsequent argument is the name
808 ** of a column of the fts3 table that the query expression may refer to.
809 ** For example:
810 **
811 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
812 */
813 static void fts3ExprTest(
814 sqlite3_context *context,
815 int argc,
816 sqlite3_value **argv
817 ){
818 sqlite3_tokenizer_module const *pModule = 0;
819 sqlite3_tokenizer *pTokenizer = 0;
820 int rc;
821 char **azCol = 0;
822 const char *zExpr;
823 int nExpr;
824 int nCol;
825 int ii;
826 Fts3Expr *pExpr;
827 sqlite3 *db = sqlite3_context_db_handle(context);
828
829 if( argc<3 ){
830 sqlite3_result_error(context,
831 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
832 );
833 return;
834 }
835
836 rc = queryTestTokenizer(db,
837 (const char *)sqlite3_value_text(argv[0]), &pModule);
838 if( rc==SQLITE_NOMEM ){
839 sqlite3_result_error_nomem(context);
840 goto exprtest_out;
841 }else if( !pModule ){
842 sqlite3_result_error(context, "No such tokenizer module", -1);
843 goto exprtest_out;
844 }
845
846 rc = pModule->xCreate(0, 0, &pTokenizer);
847 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
848 if( rc==SQLITE_NOMEM ){
849 sqlite3_result_error_nomem(context);
850 goto exprtest_out;
851 }
852 pTokenizer->pModule = pModule;
853
854 zExpr = (const char *)sqlite3_value_text(argv[1]);
855 nExpr = sqlite3_value_bytes(argv[1]);
856 nCol = argc-2;
857 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
858 if( !azCol ){
859 sqlite3_result_error_nomem(context);
860 goto exprtest_out;
861 }
862 for(ii=0; ii<nCol; ii++){
863 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
864 }
865
866 rc = sqlite3Fts3ExprParse(
867 pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
868 );
869 if( rc==SQLITE_NOMEM ){
870 sqlite3_result_error_nomem(context);
871 goto exprtest_out;
872 }else if( rc==SQLITE_OK ){
873 char zBuf[4096];
874 exprToString(pExpr, zBuf);
875 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
876 sqlite3Fts3ExprFree(pExpr);
877 }else{
878 sqlite3_result_error(context, "Error parsing expression", -1);
879 }
880
881 exprtest_out:
882 if( pModule && pTokenizer ){
883 rc = pModule->xDestroy(pTokenizer);
884 }
885 sqlite3_free(azCol);
886 }
887
888 /*
889 ** Register the query expression parser test function fts3_exprtest()
890 ** with database connection db.
891 */
892 void sqlite3Fts3ExprInitTestInterface(sqlite3* db){
893 sqlite3_create_function(
894 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
895 );
896 }
897
898 #endif
899 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
OLDNEW
« no previous file with comments | « third_party/sqlite/ext/fts3/fts3_expr.h ('k') | third_party/sqlite/ext/fts3/fts3_hash.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698