| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ** 2008 Nov 28 | |
| 3 ** | |
| 4 ** The author disclaims copyright to this source code. In place of | |
| 5 ** a legal notice, here is a blessing: | |
| 6 ** | |
| 7 ** May you do good and not evil. | |
| 8 ** May you find forgiveness for yourself and forgive others. | |
| 9 ** May you share freely, never taking more than you give. | |
| 10 ** | |
| 11 ****************************************************************************** | |
| 12 ** | |
| 13 ** This module contains code that implements a parser for fts3 query strings | |
| 14 ** (the right-hand argument to the MATCH operator). Because the supported | |
| 15 ** syntax is relatively simple, the whole tokenizer/parser system is | |
| 16 ** hand-coded. The public interface to this module is declared in source | |
| 17 ** code file "fts3_expr.h". | |
| 18 */ | |
| 19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | |
| 20 | |
| 21 /* | |
| 22 ** By default, this module parses the legacy syntax that has been | |
| 23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS | |
| 24 ** is defined, then it uses the new syntax. The differences between | |
| 25 ** the new and the old syntaxes are: | |
| 26 ** | |
| 27 ** a) The new syntax supports parenthesis. The old does not. | |
| 28 ** | |
| 29 ** b) The new syntax supports the AND and NOT operators. The old does not. | |
| 30 ** | |
| 31 ** c) The old syntax supports the "-" token qualifier. This is not | |
| 32 ** supported by the new syntax (it is replaced by the NOT operator). | |
| 33 ** | |
| 34 ** d) When using the old syntax, the OR operator has a greater precedence | |
| 35 ** than an implicit AND. When using the new, both implicity and explicit | |
| 36 ** AND operators have a higher precedence than OR. | |
| 37 ** | |
| 38 ** If compiled with SQLITE_TEST defined, then this module exports the | |
| 39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable | |
| 40 ** to zero causes the module to use the old syntax. If it is set to | |
| 41 ** non-zero the new syntax is activated. This is so both syntaxes can | |
| 42 ** be tested using a single build of testfixture. | |
| 43 */ | |
| 44 #ifdef SQLITE_TEST | |
| 45 int sqlite3_fts3_enable_parentheses = 0; | |
| 46 #else | |
| 47 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS | |
| 48 # define sqlite3_fts3_enable_parentheses 1 | |
| 49 # else | |
| 50 # define sqlite3_fts3_enable_parentheses 0 | |
| 51 # endif | |
| 52 #endif | |
| 53 | |
| 54 /* | |
| 55 ** Default span for NEAR operators. | |
| 56 */ | |
| 57 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 | |
| 58 | |
| 59 #include "fts3_expr.h" | |
| 60 #include "sqlite3.h" | |
| 61 #include <string.h> | |
| 62 #include <assert.h> | |
| 63 | |
| 64 typedef struct ParseContext ParseContext; | |
| 65 struct ParseContext { | |
| 66 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ | |
| 67 const char **azCol; /* Array of column names for fts3 table */ | |
| 68 int nCol; /* Number of entries in azCol[] */ | |
| 69 int iDefaultCol; /* Default column to query */ | |
| 70 sqlite3_context *pCtx; /* Write error message here */ | |
| 71 int nNest; /* Number of nested brackets */ | |
| 72 }; | |
| 73 | |
| 74 /* | |
| 75 ** This function is equivalent to the standard isspace() function. | |
| 76 ** | |
| 77 ** The standard isspace() can be awkward to use safely, because although it | |
| 78 ** is defined to accept an argument of type int, its behaviour when passed | |
| 79 ** an integer that falls outside of the range of the unsigned char type | |
| 80 ** is undefined (and sometimes, "undefined" means segfault). This wrapper | |
| 81 ** is defined to accept an argument of type char, and always returns 0 for | |
| 82 ** any values that fall outside of the range of the unsigned char type (i.e. | |
| 83 ** negative values). | |
| 84 */ | |
| 85 static int fts3isspace(char c){ | |
| 86 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; | |
| 87 } | |
| 88 | |
| 89 /* | |
| 90 ** Extract the next token from buffer z (length n) using the tokenizer | |
| 91 ** and other information (column names etc.) in pParse. Create an Fts3Expr | |
| 92 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this | |
| 93 ** single token and set *ppExpr to point to it. If the end of the buffer is | |
| 94 ** reached before a token is found, set *ppExpr to zero. It is the | |
| 95 ** responsibility of the caller to eventually deallocate the allocated | |
| 96 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). | |
| 97 ** | |
| 98 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation | |
| 99 ** fails. | |
| 100 */ | |
| 101 static int getNextToken( | |
| 102 ParseContext *pParse, /* fts3 query parse context */ | |
| 103 int iCol, /* Value for Fts3Phrase.iColumn */ | |
| 104 const char *z, int n, /* Input string */ | |
| 105 Fts3Expr **ppExpr, /* OUT: expression */ | |
| 106 int *pnConsumed /* OUT: Number of bytes consumed */ | |
| 107 ){ | |
| 108 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; | |
| 109 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | |
| 110 int rc; | |
| 111 sqlite3_tokenizer_cursor *pCursor; | |
| 112 Fts3Expr *pRet = 0; | |
| 113 int nConsumed = 0; | |
| 114 | |
| 115 rc = pModule->xOpen(pTokenizer, z, n, &pCursor); | |
| 116 if( rc==SQLITE_OK ){ | |
| 117 const char *zToken; | |
| 118 int nToken, iStart, iEnd, iPosition; | |
| 119 int nByte; /* total space to allocate */ | |
| 120 | |
| 121 pCursor->pTokenizer = pTokenizer; | |
| 122 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); | |
| 123 | |
| 124 if( rc==SQLITE_OK ){ | |
| 125 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; | |
| 126 pRet = (Fts3Expr *)sqlite3_malloc(nByte); | |
| 127 if( !pRet ){ | |
| 128 rc = SQLITE_NOMEM; | |
| 129 }else{ | |
| 130 memset(pRet, 0, nByte); | |
| 131 pRet->eType = FTSQUERY_PHRASE; | |
| 132 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; | |
| 133 pRet->pPhrase->nToken = 1; | |
| 134 pRet->pPhrase->iColumn = iCol; | |
| 135 pRet->pPhrase->aToken[0].n = nToken; | |
| 136 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; | |
| 137 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); | |
| 138 | |
| 139 if( iEnd<n && z[iEnd]=='*' ){ | |
| 140 pRet->pPhrase->aToken[0].isPrefix = 1; | |
| 141 iEnd++; | |
| 142 } | |
| 143 if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ | |
| 144 pRet->pPhrase->isNot = 1; | |
| 145 } | |
| 146 } | |
| 147 nConsumed = iEnd; | |
| 148 } | |
| 149 | |
| 150 pModule->xClose(pCursor); | |
| 151 } | |
| 152 | |
| 153 *pnConsumed = nConsumed; | |
| 154 *ppExpr = pRet; | |
| 155 return rc; | |
| 156 } | |
| 157 | |
| 158 | |
| 159 /* | |
| 160 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, | |
| 161 ** then free the old allocation. | |
| 162 */ | |
| 163 void *fts3ReallocOrFree(void *pOrig, int nNew){ | |
| 164 void *pRet = sqlite3_realloc(pOrig, nNew); | |
| 165 if( !pRet ){ | |
| 166 sqlite3_free(pOrig); | |
| 167 } | |
| 168 return pRet; | |
| 169 } | |
| 170 | |
| 171 /* | |
| 172 ** Buffer zInput, length nInput, contains the contents of a quoted string | |
| 173 ** that appeared as part of an fts3 query expression. Neither quote character | |
| 174 ** is included in the buffer. This function attempts to tokenize the entire | |
| 175 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE | |
| 176 ** containing the results. | |
| 177 ** | |
| 178 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the | |
| 179 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory | |
| 180 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set | |
| 181 ** to 0. | |
| 182 */ | |
| 183 static int getNextString( | |
| 184 ParseContext *pParse, /* fts3 query parse context */ | |
| 185 const char *zInput, int nInput, /* Input string */ | |
| 186 Fts3Expr **ppExpr /* OUT: expression */ | |
| 187 ){ | |
| 188 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; | |
| 189 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; | |
| 190 int rc; | |
| 191 Fts3Expr *p = 0; | |
| 192 sqlite3_tokenizer_cursor *pCursor = 0; | |
| 193 char *zTemp = 0; | |
| 194 int nTemp = 0; | |
| 195 | |
| 196 rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); | |
| 197 if( rc==SQLITE_OK ){ | |
| 198 int ii; | |
| 199 pCursor->pTokenizer = pTokenizer; | |
| 200 for(ii=0; rc==SQLITE_OK; ii++){ | |
| 201 const char *zToken; | |
| 202 int nToken, iBegin, iEnd, iPos; | |
| 203 rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); | |
| 204 if( rc==SQLITE_OK ){ | |
| 205 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | |
| 206 p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken)); | |
| 207 zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); | |
| 208 if( !p || !zTemp ){ | |
| 209 goto no_mem; | |
| 210 } | |
| 211 if( ii==0 ){ | |
| 212 memset(p, 0, nByte); | |
| 213 p->pPhrase = (Fts3Phrase *)&p[1]; | |
| 214 } | |
| 215 p->pPhrase = (Fts3Phrase *)&p[1]; | |
| 216 p->pPhrase->nToken = ii+1; | |
| 217 p->pPhrase->aToken[ii].n = nToken; | |
| 218 memcpy(&zTemp[nTemp], zToken, nToken); | |
| 219 nTemp += nToken; | |
| 220 if( iEnd<nInput && zInput[iEnd]=='*' ){ | |
| 221 p->pPhrase->aToken[ii].isPrefix = 1; | |
| 222 }else{ | |
| 223 p->pPhrase->aToken[ii].isPrefix = 0; | |
| 224 } | |
| 225 } | |
| 226 } | |
| 227 | |
| 228 pModule->xClose(pCursor); | |
| 229 pCursor = 0; | |
| 230 } | |
| 231 | |
| 232 if( rc==SQLITE_DONE ){ | |
| 233 int jj; | |
| 234 char *zNew; | |
| 235 int nNew = 0; | |
| 236 int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); | |
| 237 nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(struct PhraseToken); | |
| 238 p = fts3ReallocOrFree(p, nByte + nTemp); | |
| 239 if( !p ){ | |
| 240 goto no_mem; | |
| 241 } | |
| 242 if( zTemp ){ | |
| 243 zNew = &(((char *)p)[nByte]); | |
| 244 memcpy(zNew, zTemp, nTemp); | |
| 245 }else{ | |
| 246 memset(p, 0, nByte+nTemp); | |
| 247 } | |
| 248 p->pPhrase = (Fts3Phrase *)&p[1]; | |
| 249 for(jj=0; jj<p->pPhrase->nToken; jj++){ | |
| 250 p->pPhrase->aToken[jj].z = &zNew[nNew]; | |
| 251 nNew += p->pPhrase->aToken[jj].n; | |
| 252 } | |
| 253 sqlite3_free(zTemp); | |
| 254 p->eType = FTSQUERY_PHRASE; | |
| 255 p->pPhrase->iColumn = pParse->iDefaultCol; | |
| 256 rc = SQLITE_OK; | |
| 257 } | |
| 258 | |
| 259 *ppExpr = p; | |
| 260 return rc; | |
| 261 no_mem: | |
| 262 | |
| 263 if( pCursor ){ | |
| 264 pModule->xClose(pCursor); | |
| 265 } | |
| 266 sqlite3_free(zTemp); | |
| 267 sqlite3_free(p); | |
| 268 *ppExpr = 0; | |
| 269 return SQLITE_NOMEM; | |
| 270 } | |
| 271 | |
| 272 /* | |
| 273 ** Function getNextNode(), which is called by fts3ExprParse(), may itself | |
| 274 ** call fts3ExprParse(). So this forward declaration is required. | |
| 275 */ | |
| 276 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); | |
| 277 | |
| 278 /* | |
| 279 ** The output variable *ppExpr is populated with an allocated Fts3Expr | |
| 280 ** structure, or set to 0 if the end of the input buffer is reached. | |
| 281 ** | |
| 282 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM | |
| 283 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. | |
| 284 ** If SQLITE_ERROR is returned, pContext is populated with an error message. | |
| 285 */ | |
| 286 static int getNextNode( | |
| 287 ParseContext *pParse, /* fts3 query parse context */ | |
| 288 const char *z, int n, /* Input string */ | |
| 289 Fts3Expr **ppExpr, /* OUT: expression */ | |
| 290 int *pnConsumed /* OUT: Number of bytes consumed */ | |
| 291 ){ | |
| 292 static const struct Fts3Keyword { | |
| 293 char z[4]; /* Keyword text */ | |
| 294 unsigned char n; /* Length of the keyword */ | |
| 295 unsigned char parenOnly; /* Only valid in paren mode */ | |
| 296 unsigned char eType; /* Keyword code */ | |
| 297 } aKeyword[] = { | |
| 298 { "OR" , 2, 0, FTSQUERY_OR }, | |
| 299 { "AND", 3, 1, FTSQUERY_AND }, | |
| 300 { "NOT", 3, 1, FTSQUERY_NOT }, | |
| 301 { "NEAR", 4, 0, FTSQUERY_NEAR } | |
| 302 }; | |
| 303 int ii; | |
| 304 int iCol; | |
| 305 int iColLen; | |
| 306 int rc; | |
| 307 Fts3Expr *pRet = 0; | |
| 308 | |
| 309 const char *zInput = z; | |
| 310 int nInput = n; | |
| 311 | |
| 312 /* Skip over any whitespace before checking for a keyword, an open or | |
| 313 ** close bracket, or a quoted string. | |
| 314 */ | |
| 315 while( nInput>0 && fts3isspace(*zInput) ){ | |
| 316 nInput--; | |
| 317 zInput++; | |
| 318 } | |
| 319 if( nInput==0 ){ | |
| 320 return SQLITE_DONE; | |
| 321 } | |
| 322 | |
| 323 /* See if we are dealing with a keyword. */ | |
| 324 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ | |
| 325 const struct Fts3Keyword *pKey = &aKeyword[ii]; | |
| 326 | |
| 327 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ | |
| 328 continue; | |
| 329 } | |
| 330 | |
| 331 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ | |
| 332 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; | |
| 333 int nKey = pKey->n; | |
| 334 char cNext; | |
| 335 | |
| 336 /* If this is a "NEAR" keyword, check for an explicit nearness. */ | |
| 337 if( pKey->eType==FTSQUERY_NEAR ){ | |
| 338 assert( nKey==4 ); | |
| 339 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ | |
| 340 nNear = 0; | |
| 341 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ | |
| 342 nNear = nNear * 10 + (zInput[nKey] - '0'); | |
| 343 } | |
| 344 } | |
| 345 } | |
| 346 | |
| 347 /* At this point this is probably a keyword. But for that to be true, | |
| 348 ** the next byte must contain either whitespace, an open or close | |
| 349 ** parenthesis, a quote character, or EOF. | |
| 350 */ | |
| 351 cNext = zInput[nKey]; | |
| 352 if( fts3isspace(cNext) | |
| 353 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 | |
| 354 ){ | |
| 355 pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr)); | |
| 356 memset(pRet, 0, sizeof(Fts3Expr)); | |
| 357 pRet->eType = pKey->eType; | |
| 358 pRet->nNear = nNear; | |
| 359 *ppExpr = pRet; | |
| 360 *pnConsumed = (zInput - z) + nKey; | |
| 361 return SQLITE_OK; | |
| 362 } | |
| 363 | |
| 364 /* Turns out that wasn't a keyword after all. This happens if the | |
| 365 ** user has supplied a token such as "ORacle". Continue. | |
| 366 */ | |
| 367 } | |
| 368 } | |
| 369 | |
| 370 /* Check for an open bracket. */ | |
| 371 if( sqlite3_fts3_enable_parentheses ){ | |
| 372 if( *zInput=='(' ){ | |
| 373 int nConsumed; | |
| 374 int rc; | |
| 375 pParse->nNest++; | |
| 376 rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); | |
| 377 if( rc==SQLITE_OK && !*ppExpr ){ | |
| 378 rc = SQLITE_DONE; | |
| 379 } | |
| 380 *pnConsumed = (zInput - z) + 1 + nConsumed; | |
| 381 return rc; | |
| 382 } | |
| 383 | |
| 384 /* Check for a close bracket. */ | |
| 385 if( *zInput==')' ){ | |
| 386 pParse->nNest--; | |
| 387 *pnConsumed = (zInput - z) + 1; | |
| 388 return SQLITE_DONE; | |
| 389 } | |
| 390 } | |
| 391 | |
| 392 /* See if we are dealing with a quoted phrase. If this is the case, then | |
| 393 ** search for the closing quote and pass the whole string to getNextString() | |
| 394 ** for processing. This is easy to do, as fts3 has no syntax for escaping | |
| 395 ** a quote character embedded in a string. | |
| 396 */ | |
| 397 if( *zInput=='"' ){ | |
| 398 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); | |
| 399 *pnConsumed = (zInput - z) + ii + 1; | |
| 400 if( ii==nInput ){ | |
| 401 return SQLITE_ERROR; | |
| 402 } | |
| 403 return getNextString(pParse, &zInput[1], ii-1, ppExpr); | |
| 404 } | |
| 405 | |
| 406 | |
| 407 /* If control flows to this point, this must be a regular token, or | |
| 408 ** the end of the input. Read a regular token using the sqlite3_tokenizer | |
| 409 ** interface. Before doing so, figure out if there is an explicit | |
| 410 ** column specifier for the token. | |
| 411 ** | |
| 412 ** TODO: Strangely, it is not possible to associate a column specifier | |
| 413 ** with a quoted phrase, only with a single token. Not sure if this was | |
| 414 ** an implementation artifact or an intentional decision when fts3 was | |
| 415 ** first implemented. Whichever it was, this module duplicates the | |
| 416 ** limitation. | |
| 417 */ | |
| 418 iCol = pParse->iDefaultCol; | |
| 419 iColLen = 0; | |
| 420 for(ii=0; ii<pParse->nCol; ii++){ | |
| 421 const char *zStr = pParse->azCol[ii]; | |
| 422 int nStr = strlen(zStr); | |
| 423 if( nInput>nStr && zInput[nStr]==':' | |
| 424 && sqlite3_strnicmp(zStr, zInput, nStr)==0 | |
| 425 ){ | |
| 426 iCol = ii; | |
| 427 iColLen = ((zInput - z) + nStr + 1); | |
| 428 break; | |
| 429 } | |
| 430 } | |
| 431 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); | |
| 432 *pnConsumed += iColLen; | |
| 433 return rc; | |
| 434 } | |
| 435 | |
| 436 /* | |
| 437 ** The argument is an Fts3Expr structure for a binary operator (any type | |
| 438 ** except an FTSQUERY_PHRASE). Return an integer value representing the | |
| 439 ** precedence of the operator. Lower values have a higher precedence (i.e. | |
| 440 ** group more tightly). For example, in the C language, the == operator | |
| 441 ** groups more tightly than ||, and would therefore have a higher precedence. | |
| 442 ** | |
| 443 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS | |
| 444 ** is defined), the order of the operators in precedence from highest to | |
| 445 ** lowest is: | |
| 446 ** | |
| 447 ** NEAR | |
| 448 ** NOT | |
| 449 ** AND (including implicit ANDs) | |
| 450 ** OR | |
| 451 ** | |
| 452 ** Note that when using the old query syntax, the OR operator has a higher | |
| 453 ** precedence than the AND operator. | |
| 454 */ | |
| 455 static int opPrecedence(Fts3Expr *p){ | |
| 456 assert( p->eType!=FTSQUERY_PHRASE ); | |
| 457 if( sqlite3_fts3_enable_parentheses ){ | |
| 458 return p->eType; | |
| 459 }else if( p->eType==FTSQUERY_NEAR ){ | |
| 460 return 1; | |
| 461 }else if( p->eType==FTSQUERY_OR ){ | |
| 462 return 2; | |
| 463 } | |
| 464 assert( p->eType==FTSQUERY_AND ); | |
| 465 return 3; | |
| 466 } | |
| 467 | |
| 468 /* | |
| 469 ** Argument ppHead contains a pointer to the current head of a query | |
| 470 ** expression tree being parsed. pPrev is the expression node most recently | |
| 471 ** inserted into the tree. This function adds pNew, which is always a binary | |
| 472 ** operator node, into the expression tree based on the relative precedence | |
| 473 ** of pNew and the existing nodes of the tree. This may result in the head | |
| 474 ** of the tree changing, in which case *ppHead is set to the new root node. | |
| 475 */ | |
| 476 static void insertBinaryOperator( | |
| 477 Fts3Expr **ppHead, /* Pointer to the root node of a tree */ | |
| 478 Fts3Expr *pPrev, /* Node most recently inserted into the tree */ | |
| 479 Fts3Expr *pNew /* New binary node to insert into expression tree */ | |
| 480 ){ | |
| 481 Fts3Expr *pSplit = pPrev; | |
| 482 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ | |
| 483 pSplit = pSplit->pParent; | |
| 484 } | |
| 485 | |
| 486 if( pSplit->pParent ){ | |
| 487 assert( pSplit->pParent->pRight==pSplit ); | |
| 488 pSplit->pParent->pRight = pNew; | |
| 489 pNew->pParent = pSplit->pParent; | |
| 490 }else{ | |
| 491 *ppHead = pNew; | |
| 492 } | |
| 493 pNew->pLeft = pSplit; | |
| 494 pSplit->pParent = pNew; | |
| 495 } | |
| 496 | |
| 497 /* | |
| 498 ** Parse the fts3 query expression found in buffer z, length n. This function | |
| 499 ** returns either when the end of the buffer is reached or an unmatched | |
| 500 ** closing bracket - ')' - is encountered. | |
| 501 ** | |
| 502 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the | |
| 503 ** parsed form of the expression and *pnConsumed is set to the number of | |
| 504 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM | |
| 505 ** (out of memory error) or SQLITE_ERROR (parse error) is returned. | |
| 506 */ | |
| 507 static int fts3ExprParse( | |
| 508 ParseContext *pParse, /* fts3 query parse context */ | |
| 509 const char *z, int n, /* Text of MATCH query */ | |
| 510 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ | |
| 511 int *pnConsumed /* OUT: Number of bytes consumed */ | |
| 512 ){ | |
| 513 Fts3Expr *pRet = 0; | |
| 514 Fts3Expr *pPrev = 0; | |
| 515 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ | |
| 516 int nIn = n; | |
| 517 const char *zIn = z; | |
| 518 int rc = SQLITE_OK; | |
| 519 int isRequirePhrase = 1; | |
| 520 | |
| 521 while( rc==SQLITE_OK ){ | |
| 522 Fts3Expr *p = 0; | |
| 523 int nByte = 0; | |
| 524 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); | |
| 525 if( rc==SQLITE_OK ){ | |
| 526 int isPhrase; | |
| 527 | |
| 528 if( !sqlite3_fts3_enable_parentheses | |
| 529 && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot | |
| 530 ){ | |
| 531 /* Create an implicit NOT operator. */ | |
| 532 Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr)); | |
| 533 if( !pNot ){ | |
| 534 sqlite3Fts3ExprFree(p); | |
| 535 rc = SQLITE_NOMEM; | |
| 536 goto exprparse_out; | |
| 537 } | |
| 538 memset(pNot, 0, sizeof(Fts3Expr)); | |
| 539 pNot->eType = FTSQUERY_NOT; | |
| 540 pNot->pRight = p; | |
| 541 if( pNotBranch ){ | |
| 542 pNot->pLeft = pNotBranch; | |
| 543 } | |
| 544 pNotBranch = pNot; | |
| 545 p = pPrev; | |
| 546 }else{ | |
| 547 int eType = p->eType; | |
| 548 assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); | |
| 549 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); | |
| 550 | |
| 551 /* The isRequirePhrase variable is set to true if a phrase or | |
| 552 ** an expression contained in parenthesis is required. If a | |
| 553 ** binary operator (AND, OR, NOT or NEAR) is encounted when | |
| 554 ** isRequirePhrase is set, this is a syntax error. | |
| 555 */ | |
| 556 if( !isPhrase && isRequirePhrase ){ | |
| 557 sqlite3Fts3ExprFree(p); | |
| 558 rc = SQLITE_ERROR; | |
| 559 goto exprparse_out; | |
| 560 } | |
| 561 | |
| 562 if( isPhrase && !isRequirePhrase ){ | |
| 563 /* Insert an implicit AND operator. */ | |
| 564 Fts3Expr *pAnd; | |
| 565 assert( pRet && pPrev ); | |
| 566 pAnd = sqlite3_malloc(sizeof(Fts3Expr)); | |
| 567 if( !pAnd ){ | |
| 568 sqlite3Fts3ExprFree(p); | |
| 569 rc = SQLITE_NOMEM; | |
| 570 goto exprparse_out; | |
| 571 } | |
| 572 memset(pAnd, 0, sizeof(Fts3Expr)); | |
| 573 pAnd->eType = FTSQUERY_AND; | |
| 574 insertBinaryOperator(&pRet, pPrev, pAnd); | |
| 575 pPrev = pAnd; | |
| 576 } | |
| 577 | |
| 578 /* This test catches attempts to make either operand of a NEAR | |
| 579 ** operator something other than a phrase. For example, either of | |
| 580 ** the following: | |
| 581 ** | |
| 582 ** (bracketed expression) NEAR phrase | |
| 583 ** phrase NEAR (bracketed expression) | |
| 584 ** | |
| 585 ** Return an error in either case. | |
| 586 */ | |
| 587 if( pPrev && ( | |
| 588 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) | |
| 589 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) | |
| 590 )){ | |
| 591 sqlite3Fts3ExprFree(p); | |
| 592 rc = SQLITE_ERROR; | |
| 593 goto exprparse_out; | |
| 594 } | |
| 595 | |
| 596 if( isPhrase ){ | |
| 597 if( pRet ){ | |
| 598 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); | |
| 599 pPrev->pRight = p; | |
| 600 p->pParent = pPrev; | |
| 601 }else{ | |
| 602 pRet = p; | |
| 603 } | |
| 604 }else{ | |
| 605 insertBinaryOperator(&pRet, pPrev, p); | |
| 606 } | |
| 607 isRequirePhrase = !isPhrase; | |
| 608 } | |
| 609 assert( nByte>0 ); | |
| 610 } | |
| 611 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); | |
| 612 nIn -= nByte; | |
| 613 zIn += nByte; | |
| 614 pPrev = p; | |
| 615 } | |
| 616 | |
| 617 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ | |
| 618 rc = SQLITE_ERROR; | |
| 619 } | |
| 620 | |
| 621 if( rc==SQLITE_DONE ){ | |
| 622 rc = SQLITE_OK; | |
| 623 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ | |
| 624 if( !pRet ){ | |
| 625 rc = SQLITE_ERROR; | |
| 626 }else{ | |
| 627 Fts3Expr *pIter = pNotBranch; | |
| 628 while( pIter->pLeft ){ | |
| 629 pIter = pIter->pLeft; | |
| 630 } | |
| 631 pIter->pLeft = pRet; | |
| 632 pRet = pNotBranch; | |
| 633 } | |
| 634 } | |
| 635 } | |
| 636 *pnConsumed = n - nIn; | |
| 637 | |
| 638 exprparse_out: | |
| 639 if( rc!=SQLITE_OK ){ | |
| 640 sqlite3Fts3ExprFree(pRet); | |
| 641 sqlite3Fts3ExprFree(pNotBranch); | |
| 642 pRet = 0; | |
| 643 } | |
| 644 *ppExpr = pRet; | |
| 645 return rc; | |
| 646 } | |
| 647 | |
| 648 /* | |
| 649 ** Parameters z and n contain a pointer to and length of a buffer containing | |
| 650 ** an fts3 query expression, respectively. This function attempts to parse the | |
| 651 ** query expression and create a tree of Fts3Expr structures representing the | |
| 652 ** parsed expression. If successful, *ppExpr is set to point to the head | |
| 653 ** of the parsed expression tree and SQLITE_OK is returned. If an error | |
| 654 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse | |
| 655 ** error) is returned and *ppExpr is set to 0. | |
| 656 ** | |
| 657 ** If parameter n is a negative number, then z is assumed to point to a | |
| 658 ** nul-terminated string and the length is determined using strlen(). | |
| 659 ** | |
| 660 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to | |
| 661 ** use to normalize query tokens while parsing the expression. The azCol[] | |
| 662 ** array, which is assumed to contain nCol entries, should contain the names | |
| 663 ** of each column in the target fts3 table, in order from left to right. | |
| 664 ** Column names must be nul-terminated strings. | |
| 665 ** | |
| 666 ** The iDefaultCol parameter should be passed the index of the table column | |
| 667 ** that appears on the left-hand-side of the MATCH operator (the default | |
| 668 ** column to match against for tokens for which a column name is not explicitly | |
| 669 ** specified as part of the query string), or -1 if tokens may by default | |
| 670 ** match any table column. | |
| 671 */ | |
| 672 int sqlite3Fts3ExprParse( | |
| 673 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ | |
| 674 char **azCol, /* Array of column names for fts3 table */ | |
| 675 int nCol, /* Number of entries in azCol[] */ | |
| 676 int iDefaultCol, /* Default column to query */ | |
| 677 const char *z, int n, /* Text of MATCH query */ | |
| 678 Fts3Expr **ppExpr /* OUT: Parsed query structure */ | |
| 679 ){ | |
| 680 int nParsed; | |
| 681 int rc; | |
| 682 ParseContext sParse; | |
| 683 sParse.pTokenizer = pTokenizer; | |
| 684 sParse.azCol = (const char **)azCol; | |
| 685 sParse.nCol = nCol; | |
| 686 sParse.iDefaultCol = iDefaultCol; | |
| 687 sParse.nNest = 0; | |
| 688 if( z==0 ){ | |
| 689 *ppExpr = 0; | |
| 690 return SQLITE_OK; | |
| 691 } | |
| 692 if( n<0 ){ | |
| 693 n = strlen(z); | |
| 694 } | |
| 695 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); | |
| 696 | |
| 697 /* Check for mismatched parenthesis */ | |
| 698 if( rc==SQLITE_OK && sParse.nNest ){ | |
| 699 rc = SQLITE_ERROR; | |
| 700 sqlite3Fts3ExprFree(*ppExpr); | |
| 701 *ppExpr = 0; | |
| 702 } | |
| 703 | |
| 704 return rc; | |
| 705 } | |
| 706 | |
| 707 /* | |
| 708 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). | |
| 709 */ | |
| 710 void sqlite3Fts3ExprFree(Fts3Expr *p){ | |
| 711 if( p ){ | |
| 712 sqlite3Fts3ExprFree(p->pLeft); | |
| 713 sqlite3Fts3ExprFree(p->pRight); | |
| 714 sqlite3_free(p); | |
| 715 } | |
| 716 } | |
| 717 | |
| 718 /**************************************************************************** | |
| 719 ***************************************************************************** | |
| 720 ** Everything after this point is just test code. | |
| 721 */ | |
| 722 | |
| 723 #ifdef SQLITE_TEST | |
| 724 | |
| 725 #include <stdio.h> | |
| 726 | |
| 727 /* | |
| 728 ** Function to query the hash-table of tokenizers (see README.tokenizers). | |
| 729 */ | |
| 730 static int queryTestTokenizer( | |
| 731 sqlite3 *db, | |
| 732 const char *zName, | |
| 733 const sqlite3_tokenizer_module **pp | |
| 734 ){ | |
| 735 int rc; | |
| 736 sqlite3_stmt *pStmt; | |
| 737 const char zSql[] = "SELECT fts3_tokenizer(?)"; | |
| 738 | |
| 739 *pp = 0; | |
| 740 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | |
| 741 if( rc!=SQLITE_OK ){ | |
| 742 return rc; | |
| 743 } | |
| 744 | |
| 745 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); | |
| 746 if( SQLITE_ROW==sqlite3_step(pStmt) ){ | |
| 747 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ | |
| 748 memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); | |
| 749 } | |
| 750 } | |
| 751 | |
| 752 return sqlite3_finalize(pStmt); | |
| 753 } | |
| 754 | |
| 755 /* | |
| 756 ** This function is part of the test interface for the query parser. It | |
| 757 ** writes a text representation of the query expression pExpr into the | |
| 758 ** buffer pointed to by argument zBuf. It is assumed that zBuf is large | |
| 759 ** enough to store the required text representation. | |
| 760 */ | |
| 761 static void exprToString(Fts3Expr *pExpr, char *zBuf){ | |
| 762 switch( pExpr->eType ){ | |
| 763 case FTSQUERY_PHRASE: { | |
| 764 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
| 765 int i; | |
| 766 zBuf += sprintf(zBuf, "PHRASE %d %d", pPhrase->iColumn, pPhrase->isNot); | |
| 767 for(i=0; i<pPhrase->nToken; i++){ | |
| 768 zBuf += sprintf(zBuf," %.*s",pPhrase->aToken[i].n,pPhrase->aToken[i].z); | |
| 769 zBuf += sprintf(zBuf,"%s", (pPhrase->aToken[i].isPrefix?"+":"")); | |
| 770 } | |
| 771 return; | |
| 772 } | |
| 773 | |
| 774 case FTSQUERY_NEAR: | |
| 775 zBuf += sprintf(zBuf, "NEAR/%d ", pExpr->nNear); | |
| 776 break; | |
| 777 case FTSQUERY_NOT: | |
| 778 zBuf += sprintf(zBuf, "NOT "); | |
| 779 break; | |
| 780 case FTSQUERY_AND: | |
| 781 zBuf += sprintf(zBuf, "AND "); | |
| 782 break; | |
| 783 case FTSQUERY_OR: | |
| 784 zBuf += sprintf(zBuf, "OR "); | |
| 785 break; | |
| 786 } | |
| 787 | |
| 788 zBuf += sprintf(zBuf, "{"); | |
| 789 exprToString(pExpr->pLeft, zBuf); | |
| 790 zBuf += strlen(zBuf); | |
| 791 zBuf += sprintf(zBuf, "} "); | |
| 792 | |
| 793 zBuf += sprintf(zBuf, "{"); | |
| 794 exprToString(pExpr->pRight, zBuf); | |
| 795 zBuf += strlen(zBuf); | |
| 796 zBuf += sprintf(zBuf, "}"); | |
| 797 } | |
| 798 | |
| 799 /* | |
| 800 ** This is the implementation of a scalar SQL function used to test the | |
| 801 ** expression parser. It should be called as follows: | |
| 802 ** | |
| 803 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); | |
| 804 ** | |
| 805 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used | |
| 806 ** to parse the query expression (see README.tokenizers). The second argument | |
| 807 ** is the query expression to parse. Each subsequent argument is the name | |
| 808 ** of a column of the fts3 table that the query expression may refer to. | |
| 809 ** For example: | |
| 810 ** | |
| 811 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); | |
| 812 */ | |
| 813 static void fts3ExprTest( | |
| 814 sqlite3_context *context, | |
| 815 int argc, | |
| 816 sqlite3_value **argv | |
| 817 ){ | |
| 818 sqlite3_tokenizer_module const *pModule = 0; | |
| 819 sqlite3_tokenizer *pTokenizer = 0; | |
| 820 int rc; | |
| 821 char **azCol = 0; | |
| 822 const char *zExpr; | |
| 823 int nExpr; | |
| 824 int nCol; | |
| 825 int ii; | |
| 826 Fts3Expr *pExpr; | |
| 827 sqlite3 *db = sqlite3_context_db_handle(context); | |
| 828 | |
| 829 if( argc<3 ){ | |
| 830 sqlite3_result_error(context, | |
| 831 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 | |
| 832 ); | |
| 833 return; | |
| 834 } | |
| 835 | |
| 836 rc = queryTestTokenizer(db, | |
| 837 (const char *)sqlite3_value_text(argv[0]), &pModule); | |
| 838 if( rc==SQLITE_NOMEM ){ | |
| 839 sqlite3_result_error_nomem(context); | |
| 840 goto exprtest_out; | |
| 841 }else if( !pModule ){ | |
| 842 sqlite3_result_error(context, "No such tokenizer module", -1); | |
| 843 goto exprtest_out; | |
| 844 } | |
| 845 | |
| 846 rc = pModule->xCreate(0, 0, &pTokenizer); | |
| 847 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); | |
| 848 if( rc==SQLITE_NOMEM ){ | |
| 849 sqlite3_result_error_nomem(context); | |
| 850 goto exprtest_out; | |
| 851 } | |
| 852 pTokenizer->pModule = pModule; | |
| 853 | |
| 854 zExpr = (const char *)sqlite3_value_text(argv[1]); | |
| 855 nExpr = sqlite3_value_bytes(argv[1]); | |
| 856 nCol = argc-2; | |
| 857 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); | |
| 858 if( !azCol ){ | |
| 859 sqlite3_result_error_nomem(context); | |
| 860 goto exprtest_out; | |
| 861 } | |
| 862 for(ii=0; ii<nCol; ii++){ | |
| 863 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); | |
| 864 } | |
| 865 | |
| 866 rc = sqlite3Fts3ExprParse( | |
| 867 pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr | |
| 868 ); | |
| 869 if( rc==SQLITE_NOMEM ){ | |
| 870 sqlite3_result_error_nomem(context); | |
| 871 goto exprtest_out; | |
| 872 }else if( rc==SQLITE_OK ){ | |
| 873 char zBuf[4096]; | |
| 874 exprToString(pExpr, zBuf); | |
| 875 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); | |
| 876 sqlite3Fts3ExprFree(pExpr); | |
| 877 }else{ | |
| 878 sqlite3_result_error(context, "Error parsing expression", -1); | |
| 879 } | |
| 880 | |
| 881 exprtest_out: | |
| 882 if( pModule && pTokenizer ){ | |
| 883 rc = pModule->xDestroy(pTokenizer); | |
| 884 } | |
| 885 sqlite3_free(azCol); | |
| 886 } | |
| 887 | |
| 888 /* | |
| 889 ** Register the query expression parser test function fts3_exprtest() | |
| 890 ** with database connection db. | |
| 891 */ | |
| 892 void sqlite3Fts3ExprInitTestInterface(sqlite3* db){ | |
| 893 sqlite3_create_function( | |
| 894 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 | |
| 895 ); | |
| 896 } | |
| 897 | |
| 898 #endif | |
| 899 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | |
| OLD | NEW |