OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2008 Nov 28 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 ** This module contains code that implements a parser for fts3 query strings |
| 14 ** (the right-hand argument to the MATCH operator). Because the supported |
| 15 ** syntax is relatively simple, the whole tokenizer/parser system is |
| 16 ** hand-coded. |
| 17 */ |
| 18 #include "fts3Int.h" |
| 19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 20 |
| 21 /* |
| 22 ** By default, this module parses the legacy syntax that has been |
| 23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS |
| 24 ** is defined, then it uses the new syntax. The differences between |
| 25 ** the new and the old syntaxes are: |
| 26 ** |
| 27 ** a) The new syntax supports parenthesis. The old does not. |
| 28 ** |
| 29 ** b) The new syntax supports the AND and NOT operators. The old does not. |
| 30 ** |
| 31 ** c) The old syntax supports the "-" token qualifier. This is not |
| 32 ** supported by the new syntax (it is replaced by the NOT operator). |
| 33 ** |
| 34 ** d) When using the old syntax, the OR operator has a greater precedence |
| 35 ** than an implicit AND. When using the new, both implicity and explicit |
| 36 ** AND operators have a higher precedence than OR. |
| 37 ** |
| 38 ** If compiled with SQLITE_TEST defined, then this module exports the |
| 39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable |
| 40 ** to zero causes the module to use the old syntax. If it is set to |
| 41 ** non-zero the new syntax is activated. This is so both syntaxes can |
| 42 ** be tested using a single build of testfixture. |
| 43 ** |
| 44 ** The following describes the syntax supported by the fts3 MATCH |
| 45 ** operator in a similar format to that used by the lemon parser |
| 46 ** generator. This module does not use actually lemon, it uses a |
| 47 ** custom parser. |
| 48 ** |
| 49 ** query ::= andexpr (OR andexpr)*. |
| 50 ** |
| 51 ** andexpr ::= notexpr (AND? notexpr)*. |
| 52 ** |
| 53 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. |
| 54 ** notexpr ::= LP query RP. |
| 55 ** |
| 56 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. |
| 57 ** |
| 58 ** distance_opt ::= . |
| 59 ** distance_opt ::= / INTEGER. |
| 60 ** |
| 61 ** phrase ::= TOKEN. |
| 62 ** phrase ::= COLUMN:TOKEN. |
| 63 ** phrase ::= "TOKEN TOKEN TOKEN...". |
| 64 */ |
| 65 |
| 66 #ifdef SQLITE_TEST |
| 67 int sqlite3_fts3_enable_parentheses = 0; |
| 68 #else |
| 69 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS |
| 70 # define sqlite3_fts3_enable_parentheses 1 |
| 71 # else |
| 72 # define sqlite3_fts3_enable_parentheses 0 |
| 73 # endif |
| 74 #endif |
| 75 |
| 76 /* |
| 77 ** Default span for NEAR operators. |
| 78 */ |
| 79 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
| 80 |
| 81 #include <string.h> |
| 82 #include <assert.h> |
| 83 |
| 84 /* |
| 85 ** isNot: |
| 86 ** This variable is used by function getNextNode(). When getNextNode() is |
| 87 ** called, it sets ParseContext.isNot to true if the 'next node' is a |
| 88 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the |
| 89 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to |
| 90 ** zero. |
| 91 */ |
| 92 typedef struct ParseContext ParseContext; |
| 93 struct ParseContext { |
| 94 sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
| 95 int iLangid; /* Language id used with tokenizer */ |
| 96 const char **azCol; /* Array of column names for fts3 table */ |
| 97 int bFts4; /* True to allow FTS4-only syntax */ |
| 98 int nCol; /* Number of entries in azCol[] */ |
| 99 int iDefaultCol; /* Default column to query */ |
| 100 int isNot; /* True if getNextNode() sees a unary - */ |
| 101 sqlite3_context *pCtx; /* Write error message here */ |
| 102 int nNest; /* Number of nested brackets */ |
| 103 }; |
| 104 |
| 105 /* |
| 106 ** This function is equivalent to the standard isspace() function. |
| 107 ** |
| 108 ** The standard isspace() can be awkward to use safely, because although it |
| 109 ** is defined to accept an argument of type int, its behavior when passed |
| 110 ** an integer that falls outside of the range of the unsigned char type |
| 111 ** is undefined (and sometimes, "undefined" means segfault). This wrapper |
| 112 ** is defined to accept an argument of type char, and always returns 0 for |
| 113 ** any values that fall outside of the range of the unsigned char type (i.e. |
| 114 ** negative values). |
| 115 */ |
| 116 static int fts3isspace(char c){ |
| 117 return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
| 118 } |
| 119 |
| 120 /* |
| 121 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, |
| 122 ** zero the memory before returning a pointer to it. If unsuccessful, |
| 123 ** return NULL. |
| 124 */ |
| 125 static void *fts3MallocZero(int nByte){ |
| 126 void *pRet = sqlite3_malloc(nByte); |
| 127 if( pRet ) memset(pRet, 0, nByte); |
| 128 return pRet; |
| 129 } |
| 130 |
| 131 int sqlite3Fts3OpenTokenizer( |
| 132 sqlite3_tokenizer *pTokenizer, |
| 133 int iLangid, |
| 134 const char *z, |
| 135 int n, |
| 136 sqlite3_tokenizer_cursor **ppCsr |
| 137 ){ |
| 138 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 139 sqlite3_tokenizer_cursor *pCsr = 0; |
| 140 int rc; |
| 141 |
| 142 rc = pModule->xOpen(pTokenizer, z, n, &pCsr); |
| 143 assert( rc==SQLITE_OK || pCsr==0 ); |
| 144 if( rc==SQLITE_OK ){ |
| 145 pCsr->pTokenizer = pTokenizer; |
| 146 if( pModule->iVersion>=1 ){ |
| 147 rc = pModule->xLanguageid(pCsr, iLangid); |
| 148 if( rc!=SQLITE_OK ){ |
| 149 pModule->xClose(pCsr); |
| 150 pCsr = 0; |
| 151 } |
| 152 } |
| 153 } |
| 154 *ppCsr = pCsr; |
| 155 return rc; |
| 156 } |
| 157 |
| 158 /* |
| 159 ** Function getNextNode(), which is called by fts3ExprParse(), may itself |
| 160 ** call fts3ExprParse(). So this forward declaration is required. |
| 161 */ |
| 162 static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
| 163 |
| 164 /* |
| 165 ** Extract the next token from buffer z (length n) using the tokenizer |
| 166 ** and other information (column names etc.) in pParse. Create an Fts3Expr |
| 167 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this |
| 168 ** single token and set *ppExpr to point to it. If the end of the buffer is |
| 169 ** reached before a token is found, set *ppExpr to zero. It is the |
| 170 ** responsibility of the caller to eventually deallocate the allocated |
| 171 ** Fts3Expr structure (if any) by passing it to sqlite3_free(). |
| 172 ** |
| 173 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation |
| 174 ** fails. |
| 175 */ |
| 176 static int getNextToken( |
| 177 ParseContext *pParse, /* fts3 query parse context */ |
| 178 int iCol, /* Value for Fts3Phrase.iColumn */ |
| 179 const char *z, int n, /* Input string */ |
| 180 Fts3Expr **ppExpr, /* OUT: expression */ |
| 181 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 182 ){ |
| 183 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| 184 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 185 int rc; |
| 186 sqlite3_tokenizer_cursor *pCursor; |
| 187 Fts3Expr *pRet = 0; |
| 188 int i = 0; |
| 189 |
| 190 /* Set variable i to the maximum number of bytes of input to tokenize. */ |
| 191 for(i=0; i<n; i++){ |
| 192 if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; |
| 193 if( z[i]=='"' ) break; |
| 194 } |
| 195 |
| 196 *pnConsumed = i; |
| 197 rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); |
| 198 if( rc==SQLITE_OK ){ |
| 199 const char *zToken; |
| 200 int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; |
| 201 int nByte; /* total space to allocate */ |
| 202 |
| 203 rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
| 204 if( rc==SQLITE_OK ){ |
| 205 nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
| 206 pRet = (Fts3Expr *)fts3MallocZero(nByte); |
| 207 if( !pRet ){ |
| 208 rc = SQLITE_NOMEM; |
| 209 }else{ |
| 210 pRet->eType = FTSQUERY_PHRASE; |
| 211 pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
| 212 pRet->pPhrase->nToken = 1; |
| 213 pRet->pPhrase->iColumn = iCol; |
| 214 pRet->pPhrase->aToken[0].n = nToken; |
| 215 pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; |
| 216 memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
| 217 |
| 218 if( iEnd<n && z[iEnd]=='*' ){ |
| 219 pRet->pPhrase->aToken[0].isPrefix = 1; |
| 220 iEnd++; |
| 221 } |
| 222 |
| 223 while( 1 ){ |
| 224 if( !sqlite3_fts3_enable_parentheses |
| 225 && iStart>0 && z[iStart-1]=='-' |
| 226 ){ |
| 227 pParse->isNot = 1; |
| 228 iStart--; |
| 229 }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){ |
| 230 pRet->pPhrase->aToken[0].bFirst = 1; |
| 231 iStart--; |
| 232 }else{ |
| 233 break; |
| 234 } |
| 235 } |
| 236 |
| 237 } |
| 238 *pnConsumed = iEnd; |
| 239 }else if( i && rc==SQLITE_DONE ){ |
| 240 rc = SQLITE_OK; |
| 241 } |
| 242 |
| 243 pModule->xClose(pCursor); |
| 244 } |
| 245 |
| 246 *ppExpr = pRet; |
| 247 return rc; |
| 248 } |
| 249 |
| 250 |
| 251 /* |
| 252 ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
| 253 ** then free the old allocation. |
| 254 */ |
| 255 static void *fts3ReallocOrFree(void *pOrig, int nNew){ |
| 256 void *pRet = sqlite3_realloc(pOrig, nNew); |
| 257 if( !pRet ){ |
| 258 sqlite3_free(pOrig); |
| 259 } |
| 260 return pRet; |
| 261 } |
| 262 |
| 263 /* |
| 264 ** Buffer zInput, length nInput, contains the contents of a quoted string |
| 265 ** that appeared as part of an fts3 query expression. Neither quote character |
| 266 ** is included in the buffer. This function attempts to tokenize the entire |
| 267 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE |
| 268 ** containing the results. |
| 269 ** |
| 270 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the |
| 271 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory |
| 272 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set |
| 273 ** to 0. |
| 274 */ |
| 275 static int getNextString( |
| 276 ParseContext *pParse, /* fts3 query parse context */ |
| 277 const char *zInput, int nInput, /* Input string */ |
| 278 Fts3Expr **ppExpr /* OUT: expression */ |
| 279 ){ |
| 280 sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| 281 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| 282 int rc; |
| 283 Fts3Expr *p = 0; |
| 284 sqlite3_tokenizer_cursor *pCursor = 0; |
| 285 char *zTemp = 0; |
| 286 int nTemp = 0; |
| 287 |
| 288 const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
| 289 int nToken = 0; |
| 290 |
| 291 /* The final Fts3Expr data structure, including the Fts3Phrase, |
| 292 ** Fts3PhraseToken structures token buffers are all stored as a single |
| 293 ** allocation so that the expression can be freed with a single call to |
| 294 ** sqlite3_free(). Setting this up requires a two pass approach. |
| 295 ** |
| 296 ** The first pass, in the block below, uses a tokenizer cursor to iterate |
| 297 ** through the tokens in the expression. This pass uses fts3ReallocOrFree() |
| 298 ** to assemble data in two dynamic buffers: |
| 299 ** |
| 300 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase |
| 301 ** structure, followed by the array of Fts3PhraseToken |
| 302 ** structures. This pass only populates the Fts3PhraseToken array. |
| 303 ** |
| 304 ** Buffer zTemp: Contains copies of all tokens. |
| 305 ** |
| 306 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below, |
| 307 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase |
| 308 ** structures. |
| 309 */ |
| 310 rc = sqlite3Fts3OpenTokenizer( |
| 311 pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); |
| 312 if( rc==SQLITE_OK ){ |
| 313 int ii; |
| 314 for(ii=0; rc==SQLITE_OK; ii++){ |
| 315 const char *zByte; |
| 316 int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; |
| 317 rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); |
| 318 if( rc==SQLITE_OK ){ |
| 319 Fts3PhraseToken *pToken; |
| 320 |
| 321 p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken)); |
| 322 if( !p ) goto no_mem; |
| 323 |
| 324 zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte); |
| 325 if( !zTemp ) goto no_mem; |
| 326 |
| 327 assert( nToken==ii ); |
| 328 pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii]; |
| 329 memset(pToken, 0, sizeof(Fts3PhraseToken)); |
| 330 |
| 331 memcpy(&zTemp[nTemp], zByte, nByte); |
| 332 nTemp += nByte; |
| 333 |
| 334 pToken->n = nByte; |
| 335 pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*'); |
| 336 pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^'); |
| 337 nToken = ii+1; |
| 338 } |
| 339 } |
| 340 |
| 341 pModule->xClose(pCursor); |
| 342 pCursor = 0; |
| 343 } |
| 344 |
| 345 if( rc==SQLITE_DONE ){ |
| 346 int jj; |
| 347 char *zBuf = 0; |
| 348 |
| 349 p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp); |
| 350 if( !p ) goto no_mem; |
| 351 memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p); |
| 352 p->eType = FTSQUERY_PHRASE; |
| 353 p->pPhrase = (Fts3Phrase *)&p[1]; |
| 354 p->pPhrase->iColumn = pParse->iDefaultCol; |
| 355 p->pPhrase->nToken = nToken; |
| 356 |
| 357 zBuf = (char *)&p->pPhrase->aToken[nToken]; |
| 358 if( zTemp ){ |
| 359 memcpy(zBuf, zTemp, nTemp); |
| 360 sqlite3_free(zTemp); |
| 361 }else{ |
| 362 assert( nTemp==0 ); |
| 363 } |
| 364 |
| 365 for(jj=0; jj<p->pPhrase->nToken; jj++){ |
| 366 p->pPhrase->aToken[jj].z = zBuf; |
| 367 zBuf += p->pPhrase->aToken[jj].n; |
| 368 } |
| 369 rc = SQLITE_OK; |
| 370 } |
| 371 |
| 372 *ppExpr = p; |
| 373 return rc; |
| 374 no_mem: |
| 375 |
| 376 if( pCursor ){ |
| 377 pModule->xClose(pCursor); |
| 378 } |
| 379 sqlite3_free(zTemp); |
| 380 sqlite3_free(p); |
| 381 *ppExpr = 0; |
| 382 return SQLITE_NOMEM; |
| 383 } |
| 384 |
| 385 /* |
| 386 ** The output variable *ppExpr is populated with an allocated Fts3Expr |
| 387 ** structure, or set to 0 if the end of the input buffer is reached. |
| 388 ** |
| 389 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM |
| 390 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. |
| 391 ** If SQLITE_ERROR is returned, pContext is populated with an error message. |
| 392 */ |
| 393 static int getNextNode( |
| 394 ParseContext *pParse, /* fts3 query parse context */ |
| 395 const char *z, int n, /* Input string */ |
| 396 Fts3Expr **ppExpr, /* OUT: expression */ |
| 397 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 398 ){ |
| 399 static const struct Fts3Keyword { |
| 400 char *z; /* Keyword text */ |
| 401 unsigned char n; /* Length of the keyword */ |
| 402 unsigned char parenOnly; /* Only valid in paren mode */ |
| 403 unsigned char eType; /* Keyword code */ |
| 404 } aKeyword[] = { |
| 405 { "OR" , 2, 0, FTSQUERY_OR }, |
| 406 { "AND", 3, 1, FTSQUERY_AND }, |
| 407 { "NOT", 3, 1, FTSQUERY_NOT }, |
| 408 { "NEAR", 4, 0, FTSQUERY_NEAR } |
| 409 }; |
| 410 int ii; |
| 411 int iCol; |
| 412 int iColLen; |
| 413 int rc; |
| 414 Fts3Expr *pRet = 0; |
| 415 |
| 416 const char *zInput = z; |
| 417 int nInput = n; |
| 418 |
| 419 pParse->isNot = 0; |
| 420 |
| 421 /* Skip over any whitespace before checking for a keyword, an open or |
| 422 ** close bracket, or a quoted string. |
| 423 */ |
| 424 while( nInput>0 && fts3isspace(*zInput) ){ |
| 425 nInput--; |
| 426 zInput++; |
| 427 } |
| 428 if( nInput==0 ){ |
| 429 return SQLITE_DONE; |
| 430 } |
| 431 |
| 432 /* See if we are dealing with a keyword. */ |
| 433 for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ |
| 434 const struct Fts3Keyword *pKey = &aKeyword[ii]; |
| 435 |
| 436 if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ |
| 437 continue; |
| 438 } |
| 439 |
| 440 if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ |
| 441 int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; |
| 442 int nKey = pKey->n; |
| 443 char cNext; |
| 444 |
| 445 /* If this is a "NEAR" keyword, check for an explicit nearness. */ |
| 446 if( pKey->eType==FTSQUERY_NEAR ){ |
| 447 assert( nKey==4 ); |
| 448 if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ |
| 449 nNear = 0; |
| 450 for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ |
| 451 nNear = nNear * 10 + (zInput[nKey] - '0'); |
| 452 } |
| 453 } |
| 454 } |
| 455 |
| 456 /* At this point this is probably a keyword. But for that to be true, |
| 457 ** the next byte must contain either whitespace, an open or close |
| 458 ** parenthesis, a quote character, or EOF. |
| 459 */ |
| 460 cNext = zInput[nKey]; |
| 461 if( fts3isspace(cNext) |
| 462 || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 |
| 463 ){ |
| 464 pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); |
| 465 if( !pRet ){ |
| 466 return SQLITE_NOMEM; |
| 467 } |
| 468 pRet->eType = pKey->eType; |
| 469 pRet->nNear = nNear; |
| 470 *ppExpr = pRet; |
| 471 *pnConsumed = (int)((zInput - z) + nKey); |
| 472 return SQLITE_OK; |
| 473 } |
| 474 |
| 475 /* Turns out that wasn't a keyword after all. This happens if the |
| 476 ** user has supplied a token such as "ORacle". Continue. |
| 477 */ |
| 478 } |
| 479 } |
| 480 |
| 481 /* See if we are dealing with a quoted phrase. If this is the case, then |
| 482 ** search for the closing quote and pass the whole string to getNextString() |
| 483 ** for processing. This is easy to do, as fts3 has no syntax for escaping |
| 484 ** a quote character embedded in a string. |
| 485 */ |
| 486 if( *zInput=='"' ){ |
| 487 for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); |
| 488 *pnConsumed = (int)((zInput - z) + ii + 1); |
| 489 if( ii==nInput ){ |
| 490 return SQLITE_ERROR; |
| 491 } |
| 492 return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
| 493 } |
| 494 |
| 495 if( sqlite3_fts3_enable_parentheses ){ |
| 496 if( *zInput=='(' ){ |
| 497 int nConsumed = 0; |
| 498 pParse->nNest++; |
| 499 rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); |
| 500 if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } |
| 501 *pnConsumed = (int)(zInput - z) + 1 + nConsumed; |
| 502 return rc; |
| 503 }else if( *zInput==')' ){ |
| 504 pParse->nNest--; |
| 505 *pnConsumed = (int)((zInput - z) + 1); |
| 506 *ppExpr = 0; |
| 507 return SQLITE_DONE; |
| 508 } |
| 509 } |
| 510 |
| 511 /* If control flows to this point, this must be a regular token, or |
| 512 ** the end of the input. Read a regular token using the sqlite3_tokenizer |
| 513 ** interface. Before doing so, figure out if there is an explicit |
| 514 ** column specifier for the token. |
| 515 ** |
| 516 ** TODO: Strangely, it is not possible to associate a column specifier |
| 517 ** with a quoted phrase, only with a single token. Not sure if this was |
| 518 ** an implementation artifact or an intentional decision when fts3 was |
| 519 ** first implemented. Whichever it was, this module duplicates the |
| 520 ** limitation. |
| 521 */ |
| 522 iCol = pParse->iDefaultCol; |
| 523 iColLen = 0; |
| 524 for(ii=0; ii<pParse->nCol; ii++){ |
| 525 const char *zStr = pParse->azCol[ii]; |
| 526 int nStr = (int)strlen(zStr); |
| 527 if( nInput>nStr && zInput[nStr]==':' |
| 528 && sqlite3_strnicmp(zStr, zInput, nStr)==0 |
| 529 ){ |
| 530 iCol = ii; |
| 531 iColLen = (int)((zInput - z) + nStr + 1); |
| 532 break; |
| 533 } |
| 534 } |
| 535 rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); |
| 536 *pnConsumed += iColLen; |
| 537 return rc; |
| 538 } |
| 539 |
| 540 /* |
| 541 ** The argument is an Fts3Expr structure for a binary operator (any type |
| 542 ** except an FTSQUERY_PHRASE). Return an integer value representing the |
| 543 ** precedence of the operator. Lower values have a higher precedence (i.e. |
| 544 ** group more tightly). For example, in the C language, the == operator |
| 545 ** groups more tightly than ||, and would therefore have a higher precedence. |
| 546 ** |
| 547 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS |
| 548 ** is defined), the order of the operators in precedence from highest to |
| 549 ** lowest is: |
| 550 ** |
| 551 ** NEAR |
| 552 ** NOT |
| 553 ** AND (including implicit ANDs) |
| 554 ** OR |
| 555 ** |
| 556 ** Note that when using the old query syntax, the OR operator has a higher |
| 557 ** precedence than the AND operator. |
| 558 */ |
| 559 static int opPrecedence(Fts3Expr *p){ |
| 560 assert( p->eType!=FTSQUERY_PHRASE ); |
| 561 if( sqlite3_fts3_enable_parentheses ){ |
| 562 return p->eType; |
| 563 }else if( p->eType==FTSQUERY_NEAR ){ |
| 564 return 1; |
| 565 }else if( p->eType==FTSQUERY_OR ){ |
| 566 return 2; |
| 567 } |
| 568 assert( p->eType==FTSQUERY_AND ); |
| 569 return 3; |
| 570 } |
| 571 |
| 572 /* |
| 573 ** Argument ppHead contains a pointer to the current head of a query |
| 574 ** expression tree being parsed. pPrev is the expression node most recently |
| 575 ** inserted into the tree. This function adds pNew, which is always a binary |
| 576 ** operator node, into the expression tree based on the relative precedence |
| 577 ** of pNew and the existing nodes of the tree. This may result in the head |
| 578 ** of the tree changing, in which case *ppHead is set to the new root node. |
| 579 */ |
| 580 static void insertBinaryOperator( |
| 581 Fts3Expr **ppHead, /* Pointer to the root node of a tree */ |
| 582 Fts3Expr *pPrev, /* Node most recently inserted into the tree */ |
| 583 Fts3Expr *pNew /* New binary node to insert into expression tree */ |
| 584 ){ |
| 585 Fts3Expr *pSplit = pPrev; |
| 586 while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ |
| 587 pSplit = pSplit->pParent; |
| 588 } |
| 589 |
| 590 if( pSplit->pParent ){ |
| 591 assert( pSplit->pParent->pRight==pSplit ); |
| 592 pSplit->pParent->pRight = pNew; |
| 593 pNew->pParent = pSplit->pParent; |
| 594 }else{ |
| 595 *ppHead = pNew; |
| 596 } |
| 597 pNew->pLeft = pSplit; |
| 598 pSplit->pParent = pNew; |
| 599 } |
| 600 |
| 601 /* |
| 602 ** Parse the fts3 query expression found in buffer z, length n. This function |
| 603 ** returns either when the end of the buffer is reached or an unmatched |
| 604 ** closing bracket - ')' - is encountered. |
| 605 ** |
| 606 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the |
| 607 ** parsed form of the expression and *pnConsumed is set to the number of |
| 608 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM |
| 609 ** (out of memory error) or SQLITE_ERROR (parse error) is returned. |
| 610 */ |
| 611 static int fts3ExprParse( |
| 612 ParseContext *pParse, /* fts3 query parse context */ |
| 613 const char *z, int n, /* Text of MATCH query */ |
| 614 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 615 int *pnConsumed /* OUT: Number of bytes consumed */ |
| 616 ){ |
| 617 Fts3Expr *pRet = 0; |
| 618 Fts3Expr *pPrev = 0; |
| 619 Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ |
| 620 int nIn = n; |
| 621 const char *zIn = z; |
| 622 int rc = SQLITE_OK; |
| 623 int isRequirePhrase = 1; |
| 624 |
| 625 while( rc==SQLITE_OK ){ |
| 626 Fts3Expr *p = 0; |
| 627 int nByte = 0; |
| 628 |
| 629 rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
| 630 assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); |
| 631 if( rc==SQLITE_OK ){ |
| 632 if( p ){ |
| 633 int isPhrase; |
| 634 |
| 635 if( !sqlite3_fts3_enable_parentheses |
| 636 && p->eType==FTSQUERY_PHRASE && pParse->isNot |
| 637 ){ |
| 638 /* Create an implicit NOT operator. */ |
| 639 Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
| 640 if( !pNot ){ |
| 641 sqlite3Fts3ExprFree(p); |
| 642 rc = SQLITE_NOMEM; |
| 643 goto exprparse_out; |
| 644 } |
| 645 pNot->eType = FTSQUERY_NOT; |
| 646 pNot->pRight = p; |
| 647 p->pParent = pNot; |
| 648 if( pNotBranch ){ |
| 649 pNot->pLeft = pNotBranch; |
| 650 pNotBranch->pParent = pNot; |
| 651 } |
| 652 pNotBranch = pNot; |
| 653 p = pPrev; |
| 654 }else{ |
| 655 int eType = p->eType; |
| 656 isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
| 657 |
| 658 /* The isRequirePhrase variable is set to true if a phrase or |
| 659 ** an expression contained in parenthesis is required. If a |
| 660 ** binary operator (AND, OR, NOT or NEAR) is encounted when |
| 661 ** isRequirePhrase is set, this is a syntax error. |
| 662 */ |
| 663 if( !isPhrase && isRequirePhrase ){ |
| 664 sqlite3Fts3ExprFree(p); |
| 665 rc = SQLITE_ERROR; |
| 666 goto exprparse_out; |
| 667 } |
| 668 |
| 669 if( isPhrase && !isRequirePhrase ){ |
| 670 /* Insert an implicit AND operator. */ |
| 671 Fts3Expr *pAnd; |
| 672 assert( pRet && pPrev ); |
| 673 pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
| 674 if( !pAnd ){ |
| 675 sqlite3Fts3ExprFree(p); |
| 676 rc = SQLITE_NOMEM; |
| 677 goto exprparse_out; |
| 678 } |
| 679 pAnd->eType = FTSQUERY_AND; |
| 680 insertBinaryOperator(&pRet, pPrev, pAnd); |
| 681 pPrev = pAnd; |
| 682 } |
| 683 |
| 684 /* This test catches attempts to make either operand of a NEAR |
| 685 ** operator something other than a phrase. For example, either of |
| 686 ** the following: |
| 687 ** |
| 688 ** (bracketed expression) NEAR phrase |
| 689 ** phrase NEAR (bracketed expression) |
| 690 ** |
| 691 ** Return an error in either case. |
| 692 */ |
| 693 if( pPrev && ( |
| 694 (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) |
| 695 || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) |
| 696 )){ |
| 697 sqlite3Fts3ExprFree(p); |
| 698 rc = SQLITE_ERROR; |
| 699 goto exprparse_out; |
| 700 } |
| 701 |
| 702 if( isPhrase ){ |
| 703 if( pRet ){ |
| 704 assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
| 705 pPrev->pRight = p; |
| 706 p->pParent = pPrev; |
| 707 }else{ |
| 708 pRet = p; |
| 709 } |
| 710 }else{ |
| 711 insertBinaryOperator(&pRet, pPrev, p); |
| 712 } |
| 713 isRequirePhrase = !isPhrase; |
| 714 } |
| 715 pPrev = p; |
| 716 } |
| 717 assert( nByte>0 ); |
| 718 } |
| 719 assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); |
| 720 nIn -= nByte; |
| 721 zIn += nByte; |
| 722 } |
| 723 |
| 724 if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ |
| 725 rc = SQLITE_ERROR; |
| 726 } |
| 727 |
| 728 if( rc==SQLITE_DONE ){ |
| 729 rc = SQLITE_OK; |
| 730 if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ |
| 731 if( !pRet ){ |
| 732 rc = SQLITE_ERROR; |
| 733 }else{ |
| 734 Fts3Expr *pIter = pNotBranch; |
| 735 while( pIter->pLeft ){ |
| 736 pIter = pIter->pLeft; |
| 737 } |
| 738 pIter->pLeft = pRet; |
| 739 pRet->pParent = pIter; |
| 740 pRet = pNotBranch; |
| 741 } |
| 742 } |
| 743 } |
| 744 *pnConsumed = n - nIn; |
| 745 |
| 746 exprparse_out: |
| 747 if( rc!=SQLITE_OK ){ |
| 748 sqlite3Fts3ExprFree(pRet); |
| 749 sqlite3Fts3ExprFree(pNotBranch); |
| 750 pRet = 0; |
| 751 } |
| 752 *ppExpr = pRet; |
| 753 return rc; |
| 754 } |
| 755 |
| 756 /* |
| 757 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed |
| 758 ** as the only argument is more than nMaxDepth. |
| 759 */ |
| 760 static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ |
| 761 int rc = SQLITE_OK; |
| 762 if( p ){ |
| 763 if( nMaxDepth<0 ){ |
| 764 rc = SQLITE_TOOBIG; |
| 765 }else{ |
| 766 rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); |
| 767 if( rc==SQLITE_OK ){ |
| 768 rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); |
| 769 } |
| 770 } |
| 771 } |
| 772 return rc; |
| 773 } |
| 774 |
| 775 /* |
| 776 ** This function attempts to transform the expression tree at (*pp) to |
| 777 ** an equivalent but more balanced form. The tree is modified in place. |
| 778 ** If successful, SQLITE_OK is returned and (*pp) set to point to the |
| 779 ** new root expression node. |
| 780 ** |
| 781 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree. |
| 782 ** |
| 783 ** Otherwise, if an error occurs, an SQLite error code is returned and |
| 784 ** expression (*pp) freed. |
| 785 */ |
| 786 static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ |
| 787 int rc = SQLITE_OK; /* Return code */ |
| 788 Fts3Expr *pRoot = *pp; /* Initial root node */ |
| 789 Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ |
| 790 int eType = pRoot->eType; /* Type of node in this tree */ |
| 791 |
| 792 if( nMaxDepth==0 ){ |
| 793 rc = SQLITE_ERROR; |
| 794 } |
| 795 |
| 796 if( rc==SQLITE_OK ){ |
| 797 if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ |
| 798 Fts3Expr **apLeaf; |
| 799 apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); |
| 800 if( 0==apLeaf ){ |
| 801 rc = SQLITE_NOMEM; |
| 802 }else{ |
| 803 memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); |
| 804 } |
| 805 |
| 806 if( rc==SQLITE_OK ){ |
| 807 int i; |
| 808 Fts3Expr *p; |
| 809 |
| 810 /* Set $p to point to the left-most leaf in the tree of eType nodes. */ |
| 811 for(p=pRoot; p->eType==eType; p=p->pLeft){ |
| 812 assert( p->pParent==0 || p->pParent->pLeft==p ); |
| 813 assert( p->pLeft && p->pRight ); |
| 814 } |
| 815 |
| 816 /* This loop runs once for each leaf in the tree of eType nodes. */ |
| 817 while( 1 ){ |
| 818 int iLvl; |
| 819 Fts3Expr *pParent = p->pParent; /* Current parent of p */ |
| 820 |
| 821 assert( pParent==0 || pParent->pLeft==p ); |
| 822 p->pParent = 0; |
| 823 if( pParent ){ |
| 824 pParent->pLeft = 0; |
| 825 }else{ |
| 826 pRoot = 0; |
| 827 } |
| 828 rc = fts3ExprBalance(&p, nMaxDepth-1); |
| 829 if( rc!=SQLITE_OK ) break; |
| 830 |
| 831 for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ |
| 832 if( apLeaf[iLvl]==0 ){ |
| 833 apLeaf[iLvl] = p; |
| 834 p = 0; |
| 835 }else{ |
| 836 assert( pFree ); |
| 837 pFree->pLeft = apLeaf[iLvl]; |
| 838 pFree->pRight = p; |
| 839 pFree->pLeft->pParent = pFree; |
| 840 pFree->pRight->pParent = pFree; |
| 841 |
| 842 p = pFree; |
| 843 pFree = pFree->pParent; |
| 844 p->pParent = 0; |
| 845 apLeaf[iLvl] = 0; |
| 846 } |
| 847 } |
| 848 if( p ){ |
| 849 sqlite3Fts3ExprFree(p); |
| 850 rc = SQLITE_TOOBIG; |
| 851 break; |
| 852 } |
| 853 |
| 854 /* If that was the last leaf node, break out of the loop */ |
| 855 if( pParent==0 ) break; |
| 856 |
| 857 /* Set $p to point to the next leaf in the tree of eType nodes */ |
| 858 for(p=pParent->pRight; p->eType==eType; p=p->pLeft); |
| 859 |
| 860 /* Remove pParent from the original tree. */ |
| 861 assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); |
| 862 pParent->pRight->pParent = pParent->pParent; |
| 863 if( pParent->pParent ){ |
| 864 pParent->pParent->pLeft = pParent->pRight; |
| 865 }else{ |
| 866 assert( pParent==pRoot ); |
| 867 pRoot = pParent->pRight; |
| 868 } |
| 869 |
| 870 /* Link pParent into the free node list. It will be used as an |
| 871 ** internal node of the new tree. */ |
| 872 pParent->pParent = pFree; |
| 873 pFree = pParent; |
| 874 } |
| 875 |
| 876 if( rc==SQLITE_OK ){ |
| 877 p = 0; |
| 878 for(i=0; i<nMaxDepth; i++){ |
| 879 if( apLeaf[i] ){ |
| 880 if( p==0 ){ |
| 881 p = apLeaf[i]; |
| 882 p->pParent = 0; |
| 883 }else{ |
| 884 assert( pFree!=0 ); |
| 885 pFree->pRight = p; |
| 886 pFree->pLeft = apLeaf[i]; |
| 887 pFree->pLeft->pParent = pFree; |
| 888 pFree->pRight->pParent = pFree; |
| 889 |
| 890 p = pFree; |
| 891 pFree = pFree->pParent; |
| 892 p->pParent = 0; |
| 893 } |
| 894 } |
| 895 } |
| 896 pRoot = p; |
| 897 }else{ |
| 898 /* An error occurred. Delete the contents of the apLeaf[] array |
| 899 ** and pFree list. Everything else is cleaned up by the call to |
| 900 ** sqlite3Fts3ExprFree(pRoot) below. */ |
| 901 Fts3Expr *pDel; |
| 902 for(i=0; i<nMaxDepth; i++){ |
| 903 sqlite3Fts3ExprFree(apLeaf[i]); |
| 904 } |
| 905 while( (pDel=pFree)!=0 ){ |
| 906 pFree = pDel->pParent; |
| 907 sqlite3_free(pDel); |
| 908 } |
| 909 } |
| 910 |
| 911 assert( pFree==0 ); |
| 912 sqlite3_free( apLeaf ); |
| 913 } |
| 914 }else if( eType==FTSQUERY_NOT ){ |
| 915 Fts3Expr *pLeft = pRoot->pLeft; |
| 916 Fts3Expr *pRight = pRoot->pRight; |
| 917 |
| 918 pRoot->pLeft = 0; |
| 919 pRoot->pRight = 0; |
| 920 pLeft->pParent = 0; |
| 921 pRight->pParent = 0; |
| 922 |
| 923 rc = fts3ExprBalance(&pLeft, nMaxDepth-1); |
| 924 if( rc==SQLITE_OK ){ |
| 925 rc = fts3ExprBalance(&pRight, nMaxDepth-1); |
| 926 } |
| 927 |
| 928 if( rc!=SQLITE_OK ){ |
| 929 sqlite3Fts3ExprFree(pRight); |
| 930 sqlite3Fts3ExprFree(pLeft); |
| 931 }else{ |
| 932 assert( pLeft && pRight ); |
| 933 pRoot->pLeft = pLeft; |
| 934 pLeft->pParent = pRoot; |
| 935 pRoot->pRight = pRight; |
| 936 pRight->pParent = pRoot; |
| 937 } |
| 938 } |
| 939 } |
| 940 |
| 941 if( rc!=SQLITE_OK ){ |
| 942 sqlite3Fts3ExprFree(pRoot); |
| 943 pRoot = 0; |
| 944 } |
| 945 *pp = pRoot; |
| 946 return rc; |
| 947 } |
| 948 |
| 949 /* |
| 950 ** This function is similar to sqlite3Fts3ExprParse(), with the following |
| 951 ** differences: |
| 952 ** |
| 953 ** 1. It does not do expression rebalancing. |
| 954 ** 2. It does not check that the expression does not exceed the |
| 955 ** maximum allowable depth. |
| 956 ** 3. Even if it fails, *ppExpr may still be set to point to an |
| 957 ** expression tree. It should be deleted using sqlite3Fts3ExprFree() |
| 958 ** in this case. |
| 959 */ |
| 960 static int fts3ExprParseUnbalanced( |
| 961 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 962 int iLangid, /* Language id for tokenizer */ |
| 963 char **azCol, /* Array of column names for fts3 table */ |
| 964 int bFts4, /* True to allow FTS4-only syntax */ |
| 965 int nCol, /* Number of entries in azCol[] */ |
| 966 int iDefaultCol, /* Default column to query */ |
| 967 const char *z, int n, /* Text of MATCH query */ |
| 968 Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
| 969 ){ |
| 970 int nParsed; |
| 971 int rc; |
| 972 ParseContext sParse; |
| 973 |
| 974 memset(&sParse, 0, sizeof(ParseContext)); |
| 975 sParse.pTokenizer = pTokenizer; |
| 976 sParse.iLangid = iLangid; |
| 977 sParse.azCol = (const char **)azCol; |
| 978 sParse.nCol = nCol; |
| 979 sParse.iDefaultCol = iDefaultCol; |
| 980 sParse.bFts4 = bFts4; |
| 981 if( z==0 ){ |
| 982 *ppExpr = 0; |
| 983 return SQLITE_OK; |
| 984 } |
| 985 if( n<0 ){ |
| 986 n = (int)strlen(z); |
| 987 } |
| 988 rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
| 989 assert( rc==SQLITE_OK || *ppExpr==0 ); |
| 990 |
| 991 /* Check for mismatched parenthesis */ |
| 992 if( rc==SQLITE_OK && sParse.nNest ){ |
| 993 rc = SQLITE_ERROR; |
| 994 } |
| 995 |
| 996 return rc; |
| 997 } |
| 998 |
| 999 /* |
| 1000 ** Parameters z and n contain a pointer to and length of a buffer containing |
| 1001 ** an fts3 query expression, respectively. This function attempts to parse the |
| 1002 ** query expression and create a tree of Fts3Expr structures representing the |
| 1003 ** parsed expression. If successful, *ppExpr is set to point to the head |
| 1004 ** of the parsed expression tree and SQLITE_OK is returned. If an error |
| 1005 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse |
| 1006 ** error) is returned and *ppExpr is set to 0. |
| 1007 ** |
| 1008 ** If parameter n is a negative number, then z is assumed to point to a |
| 1009 ** nul-terminated string and the length is determined using strlen(). |
| 1010 ** |
| 1011 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to |
| 1012 ** use to normalize query tokens while parsing the expression. The azCol[] |
| 1013 ** array, which is assumed to contain nCol entries, should contain the names |
| 1014 ** of each column in the target fts3 table, in order from left to right. |
| 1015 ** Column names must be nul-terminated strings. |
| 1016 ** |
| 1017 ** The iDefaultCol parameter should be passed the index of the table column |
| 1018 ** that appears on the left-hand-side of the MATCH operator (the default |
| 1019 ** column to match against for tokens for which a column name is not explicitly |
| 1020 ** specified as part of the query string), or -1 if tokens may by default |
| 1021 ** match any table column. |
| 1022 */ |
| 1023 int sqlite3Fts3ExprParse( |
| 1024 sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| 1025 int iLangid, /* Language id for tokenizer */ |
| 1026 char **azCol, /* Array of column names for fts3 table */ |
| 1027 int bFts4, /* True to allow FTS4-only syntax */ |
| 1028 int nCol, /* Number of entries in azCol[] */ |
| 1029 int iDefaultCol, /* Default column to query */ |
| 1030 const char *z, int n, /* Text of MATCH query */ |
| 1031 Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| 1032 char **pzErr /* OUT: Error message (sqlite3_malloc) */ |
| 1033 ){ |
| 1034 int rc = fts3ExprParseUnbalanced( |
| 1035 pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr |
| 1036 ); |
| 1037 |
| 1038 /* Rebalance the expression. And check that its depth does not exceed |
| 1039 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ |
| 1040 if( rc==SQLITE_OK && *ppExpr ){ |
| 1041 rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
| 1042 if( rc==SQLITE_OK ){ |
| 1043 rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); |
| 1044 } |
| 1045 } |
| 1046 |
| 1047 if( rc!=SQLITE_OK ){ |
| 1048 sqlite3Fts3ExprFree(*ppExpr); |
| 1049 *ppExpr = 0; |
| 1050 if( rc==SQLITE_TOOBIG ){ |
| 1051 sqlite3Fts3ErrMsg(pzErr, |
| 1052 "FTS expression tree is too large (maximum depth %d)", |
| 1053 SQLITE_FTS3_MAX_EXPR_DEPTH |
| 1054 ); |
| 1055 rc = SQLITE_ERROR; |
| 1056 }else if( rc==SQLITE_ERROR ){ |
| 1057 sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); |
| 1058 } |
| 1059 } |
| 1060 |
| 1061 return rc; |
| 1062 } |
| 1063 |
| 1064 /* |
| 1065 ** Free a single node of an expression tree. |
| 1066 */ |
| 1067 static void fts3FreeExprNode(Fts3Expr *p){ |
| 1068 assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); |
| 1069 sqlite3Fts3EvalPhraseCleanup(p->pPhrase); |
| 1070 sqlite3_free(p->aMI); |
| 1071 sqlite3_free(p); |
| 1072 } |
| 1073 |
| 1074 /* |
| 1075 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
| 1076 ** |
| 1077 ** This function would be simpler if it recursively called itself. But |
| 1078 ** that would mean passing a sufficiently large expression to ExprParse() |
| 1079 ** could cause a stack overflow. |
| 1080 */ |
| 1081 void sqlite3Fts3ExprFree(Fts3Expr *pDel){ |
| 1082 Fts3Expr *p; |
| 1083 assert( pDel==0 || pDel->pParent==0 ); |
| 1084 for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ |
| 1085 assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 1086 } |
| 1087 while( p ){ |
| 1088 Fts3Expr *pParent = p->pParent; |
| 1089 fts3FreeExprNode(p); |
| 1090 if( pParent && p==pParent->pLeft && pParent->pRight ){ |
| 1091 p = pParent->pRight; |
| 1092 while( p && (p->pLeft || p->pRight) ){ |
| 1093 assert( p==p->pParent->pRight || p==p->pParent->pLeft ); |
| 1094 p = (p->pLeft ? p->pLeft : p->pRight); |
| 1095 } |
| 1096 }else{ |
| 1097 p = pParent; |
| 1098 } |
| 1099 } |
| 1100 } |
| 1101 |
| 1102 /**************************************************************************** |
| 1103 ***************************************************************************** |
| 1104 ** Everything after this point is just test code. |
| 1105 */ |
| 1106 |
| 1107 #ifdef SQLITE_TEST |
| 1108 |
| 1109 #include <stdio.h> |
| 1110 |
| 1111 /* |
| 1112 ** Function to query the hash-table of tokenizers (see README.tokenizers). |
| 1113 */ |
| 1114 static int queryTestTokenizer( |
| 1115 sqlite3 *db, |
| 1116 const char *zName, |
| 1117 const sqlite3_tokenizer_module **pp |
| 1118 ){ |
| 1119 int rc; |
| 1120 sqlite3_stmt *pStmt; |
| 1121 const char zSql[] = "SELECT fts3_tokenizer(?)"; |
| 1122 |
| 1123 *pp = 0; |
| 1124 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| 1125 if( rc!=SQLITE_OK ){ |
| 1126 return rc; |
| 1127 } |
| 1128 |
| 1129 sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| 1130 if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| 1131 if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
| 1132 memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
| 1133 } |
| 1134 } |
| 1135 |
| 1136 return sqlite3_finalize(pStmt); |
| 1137 } |
| 1138 |
| 1139 /* |
| 1140 ** Return a pointer to a buffer containing a text representation of the |
| 1141 ** expression passed as the first argument. The buffer is obtained from |
| 1142 ** sqlite3_malloc(). It is the responsibility of the caller to use |
| 1143 ** sqlite3_free() to release the memory. If an OOM condition is encountered, |
| 1144 ** NULL is returned. |
| 1145 ** |
| 1146 ** If the second argument is not NULL, then its contents are prepended to |
| 1147 ** the returned expression text and then freed using sqlite3_free(). |
| 1148 */ |
| 1149 static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
| 1150 if( pExpr==0 ){ |
| 1151 return sqlite3_mprintf(""); |
| 1152 } |
| 1153 switch( pExpr->eType ){ |
| 1154 case FTSQUERY_PHRASE: { |
| 1155 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 1156 int i; |
| 1157 zBuf = sqlite3_mprintf( |
| 1158 "%zPHRASE %d 0", zBuf, pPhrase->iColumn); |
| 1159 for(i=0; zBuf && i<pPhrase->nToken; i++){ |
| 1160 zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
| 1161 pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
| 1162 (pPhrase->aToken[i].isPrefix?"+":"") |
| 1163 ); |
| 1164 } |
| 1165 return zBuf; |
| 1166 } |
| 1167 |
| 1168 case FTSQUERY_NEAR: |
| 1169 zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); |
| 1170 break; |
| 1171 case FTSQUERY_NOT: |
| 1172 zBuf = sqlite3_mprintf("%zNOT ", zBuf); |
| 1173 break; |
| 1174 case FTSQUERY_AND: |
| 1175 zBuf = sqlite3_mprintf("%zAND ", zBuf); |
| 1176 break; |
| 1177 case FTSQUERY_OR: |
| 1178 zBuf = sqlite3_mprintf("%zOR ", zBuf); |
| 1179 break; |
| 1180 } |
| 1181 |
| 1182 if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); |
| 1183 if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); |
| 1184 if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); |
| 1185 |
| 1186 if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); |
| 1187 if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); |
| 1188 |
| 1189 return zBuf; |
| 1190 } |
| 1191 |
| 1192 /* |
| 1193 ** This is the implementation of a scalar SQL function used to test the |
| 1194 ** expression parser. It should be called as follows: |
| 1195 ** |
| 1196 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); |
| 1197 ** |
| 1198 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used |
| 1199 ** to parse the query expression (see README.tokenizers). The second argument |
| 1200 ** is the query expression to parse. Each subsequent argument is the name |
| 1201 ** of a column of the fts3 table that the query expression may refer to. |
| 1202 ** For example: |
| 1203 ** |
| 1204 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); |
| 1205 */ |
| 1206 static void fts3ExprTest( |
| 1207 sqlite3_context *context, |
| 1208 int argc, |
| 1209 sqlite3_value **argv |
| 1210 ){ |
| 1211 sqlite3_tokenizer_module const *pModule = 0; |
| 1212 sqlite3_tokenizer *pTokenizer = 0; |
| 1213 int rc; |
| 1214 char **azCol = 0; |
| 1215 const char *zExpr; |
| 1216 int nExpr; |
| 1217 int nCol; |
| 1218 int ii; |
| 1219 Fts3Expr *pExpr; |
| 1220 char *zBuf = 0; |
| 1221 sqlite3 *db = sqlite3_context_db_handle(context); |
| 1222 |
| 1223 if( argc<3 ){ |
| 1224 sqlite3_result_error(context, |
| 1225 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 |
| 1226 ); |
| 1227 return; |
| 1228 } |
| 1229 |
| 1230 rc = queryTestTokenizer(db, |
| 1231 (const char *)sqlite3_value_text(argv[0]), &pModule); |
| 1232 if( rc==SQLITE_NOMEM ){ |
| 1233 sqlite3_result_error_nomem(context); |
| 1234 goto exprtest_out; |
| 1235 }else if( !pModule ){ |
| 1236 sqlite3_result_error(context, "No such tokenizer module", -1); |
| 1237 goto exprtest_out; |
| 1238 } |
| 1239 |
| 1240 rc = pModule->xCreate(0, 0, &pTokenizer); |
| 1241 assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); |
| 1242 if( rc==SQLITE_NOMEM ){ |
| 1243 sqlite3_result_error_nomem(context); |
| 1244 goto exprtest_out; |
| 1245 } |
| 1246 pTokenizer->pModule = pModule; |
| 1247 |
| 1248 zExpr = (const char *)sqlite3_value_text(argv[1]); |
| 1249 nExpr = sqlite3_value_bytes(argv[1]); |
| 1250 nCol = argc-2; |
| 1251 azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); |
| 1252 if( !azCol ){ |
| 1253 sqlite3_result_error_nomem(context); |
| 1254 goto exprtest_out; |
| 1255 } |
| 1256 for(ii=0; ii<nCol; ii++){ |
| 1257 azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
| 1258 } |
| 1259 |
| 1260 if( sqlite3_user_data(context) ){ |
| 1261 char *zDummy = 0; |
| 1262 rc = sqlite3Fts3ExprParse( |
| 1263 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy |
| 1264 ); |
| 1265 assert( rc==SQLITE_OK || pExpr==0 ); |
| 1266 sqlite3_free(zDummy); |
| 1267 }else{ |
| 1268 rc = fts3ExprParseUnbalanced( |
| 1269 pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr |
| 1270 ); |
| 1271 } |
| 1272 |
| 1273 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
| 1274 sqlite3Fts3ExprFree(pExpr); |
| 1275 sqlite3_result_error(context, "Error parsing expression", -1); |
| 1276 }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
| 1277 sqlite3_result_error_nomem(context); |
| 1278 }else{ |
| 1279 sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); |
| 1280 sqlite3_free(zBuf); |
| 1281 } |
| 1282 |
| 1283 sqlite3Fts3ExprFree(pExpr); |
| 1284 |
| 1285 exprtest_out: |
| 1286 if( pModule && pTokenizer ){ |
| 1287 rc = pModule->xDestroy(pTokenizer); |
| 1288 } |
| 1289 sqlite3_free(azCol); |
| 1290 } |
| 1291 |
| 1292 /* |
| 1293 ** Register the query expression parser test function fts3_exprtest() |
| 1294 ** with database connection db. |
| 1295 */ |
| 1296 int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
| 1297 int rc = sqlite3_create_function( |
| 1298 db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
| 1299 ); |
| 1300 if( rc==SQLITE_OK ){ |
| 1301 rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", |
| 1302 -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 |
| 1303 ); |
| 1304 } |
| 1305 return rc; |
| 1306 } |
| 1307 |
| 1308 #endif |
| 1309 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
OLD | NEW |