OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2014 May 31 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 */ |
| 14 |
| 15 |
| 16 |
| 17 #include "fts5Int.h" |
| 18 #include "fts5parse.h" |
| 19 |
| 20 /* |
| 21 ** All token types in the generated fts5parse.h file are greater than 0. |
| 22 */ |
| 23 #define FTS5_EOF 0 |
| 24 |
| 25 #define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) |
| 26 |
| 27 typedef struct Fts5ExprTerm Fts5ExprTerm; |
| 28 |
| 29 /* |
| 30 ** Functions generated by lemon from fts5parse.y. |
| 31 */ |
| 32 void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); |
| 33 void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); |
| 34 void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); |
| 35 #ifndef NDEBUG |
| 36 #include <stdio.h> |
| 37 void sqlite3Fts5ParserTrace(FILE*, char*); |
| 38 #endif |
| 39 |
| 40 |
| 41 struct Fts5Expr { |
| 42 Fts5Index *pIndex; |
| 43 Fts5Config *pConfig; |
| 44 Fts5ExprNode *pRoot; |
| 45 int bDesc; /* Iterate in descending rowid order */ |
| 46 int nPhrase; /* Number of phrases in expression */ |
| 47 Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ |
| 48 }; |
| 49 |
| 50 /* |
| 51 ** eType: |
| 52 ** Expression node type. Always one of: |
| 53 ** |
| 54 ** FTS5_AND (nChild, apChild valid) |
| 55 ** FTS5_OR (nChild, apChild valid) |
| 56 ** FTS5_NOT (nChild, apChild valid) |
| 57 ** FTS5_STRING (pNear valid) |
| 58 ** FTS5_TERM (pNear valid) |
| 59 */ |
| 60 struct Fts5ExprNode { |
| 61 int eType; /* Node type */ |
| 62 int bEof; /* True at EOF */ |
| 63 int bNomatch; /* True if entry is not a match */ |
| 64 |
| 65 /* Next method for this node. */ |
| 66 int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); |
| 67 |
| 68 i64 iRowid; /* Current rowid */ |
| 69 Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ |
| 70 |
| 71 /* Child nodes. For a NOT node, this array always contains 2 entries. For |
| 72 ** AND or OR nodes, it contains 2 or more entries. */ |
| 73 int nChild; /* Number of child nodes */ |
| 74 Fts5ExprNode *apChild[1]; /* Array of child nodes */ |
| 75 }; |
| 76 |
| 77 #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING) |
| 78 |
| 79 /* |
| 80 ** Invoke the xNext method of an Fts5ExprNode object. This macro should be |
| 81 ** used as if it has the same signature as the xNext() methods themselves. |
| 82 */ |
| 83 #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d)) |
| 84 |
| 85 /* |
| 86 ** An instance of the following structure represents a single search term |
| 87 ** or term prefix. |
| 88 */ |
| 89 struct Fts5ExprTerm { |
| 90 int bPrefix; /* True for a prefix term */ |
| 91 char *zTerm; /* nul-terminated term */ |
| 92 Fts5IndexIter *pIter; /* Iterator for this term */ |
| 93 Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ |
| 94 }; |
| 95 |
| 96 /* |
| 97 ** A phrase. One or more terms that must appear in a contiguous sequence |
| 98 ** within a document for it to match. |
| 99 */ |
| 100 struct Fts5ExprPhrase { |
| 101 Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ |
| 102 Fts5Buffer poslist; /* Current position list */ |
| 103 int nTerm; /* Number of entries in aTerm[] */ |
| 104 Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */ |
| 105 }; |
| 106 |
| 107 /* |
| 108 ** One or more phrases that must appear within a certain token distance of |
| 109 ** each other within each matching document. |
| 110 */ |
| 111 struct Fts5ExprNearset { |
| 112 int nNear; /* NEAR parameter */ |
| 113 Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ |
| 114 int nPhrase; /* Number of entries in aPhrase[] array */ |
| 115 Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */ |
| 116 }; |
| 117 |
| 118 |
| 119 /* |
| 120 ** Parse context. |
| 121 */ |
| 122 struct Fts5Parse { |
| 123 Fts5Config *pConfig; |
| 124 char *zErr; |
| 125 int rc; |
| 126 int nPhrase; /* Size of apPhrase array */ |
| 127 Fts5ExprPhrase **apPhrase; /* Array of all phrases */ |
| 128 Fts5ExprNode *pExpr; /* Result of a successful parse */ |
| 129 }; |
| 130 |
| 131 void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ |
| 132 va_list ap; |
| 133 va_start(ap, zFmt); |
| 134 if( pParse->rc==SQLITE_OK ){ |
| 135 pParse->zErr = sqlite3_vmprintf(zFmt, ap); |
| 136 pParse->rc = SQLITE_ERROR; |
| 137 } |
| 138 va_end(ap); |
| 139 } |
| 140 |
| 141 static int fts5ExprIsspace(char t){ |
| 142 return t==' ' || t=='\t' || t=='\n' || t=='\r'; |
| 143 } |
| 144 |
| 145 /* |
| 146 ** Read the first token from the nul-terminated string at *pz. |
| 147 */ |
| 148 static int fts5ExprGetToken( |
| 149 Fts5Parse *pParse, |
| 150 const char **pz, /* IN/OUT: Pointer into buffer */ |
| 151 Fts5Token *pToken |
| 152 ){ |
| 153 const char *z = *pz; |
| 154 int tok; |
| 155 |
| 156 /* Skip past any whitespace */ |
| 157 while( fts5ExprIsspace(*z) ) z++; |
| 158 |
| 159 pToken->p = z; |
| 160 pToken->n = 1; |
| 161 switch( *z ){ |
| 162 case '(': tok = FTS5_LP; break; |
| 163 case ')': tok = FTS5_RP; break; |
| 164 case '{': tok = FTS5_LCP; break; |
| 165 case '}': tok = FTS5_RCP; break; |
| 166 case ':': tok = FTS5_COLON; break; |
| 167 case ',': tok = FTS5_COMMA; break; |
| 168 case '+': tok = FTS5_PLUS; break; |
| 169 case '*': tok = FTS5_STAR; break; |
| 170 case '-': tok = FTS5_MINUS; break; |
| 171 case '\0': tok = FTS5_EOF; break; |
| 172 |
| 173 case '"': { |
| 174 const char *z2; |
| 175 tok = FTS5_STRING; |
| 176 |
| 177 for(z2=&z[1]; 1; z2++){ |
| 178 if( z2[0]=='"' ){ |
| 179 z2++; |
| 180 if( z2[0]!='"' ) break; |
| 181 } |
| 182 if( z2[0]=='\0' ){ |
| 183 sqlite3Fts5ParseError(pParse, "unterminated string"); |
| 184 return FTS5_EOF; |
| 185 } |
| 186 } |
| 187 pToken->n = (z2 - z); |
| 188 break; |
| 189 } |
| 190 |
| 191 default: { |
| 192 const char *z2; |
| 193 if( sqlite3Fts5IsBareword(z[0])==0 ){ |
| 194 sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); |
| 195 return FTS5_EOF; |
| 196 } |
| 197 tok = FTS5_STRING; |
| 198 for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); |
| 199 pToken->n = (z2 - z); |
| 200 if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; |
| 201 if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; |
| 202 if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; |
| 203 break; |
| 204 } |
| 205 } |
| 206 |
| 207 *pz = &pToken->p[pToken->n]; |
| 208 return tok; |
| 209 } |
| 210 |
| 211 static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); } |
| 212 static void fts5ParseFree(void *p){ sqlite3_free(p); } |
| 213 |
| 214 int sqlite3Fts5ExprNew( |
| 215 Fts5Config *pConfig, /* FTS5 Configuration */ |
| 216 const char *zExpr, /* Expression text */ |
| 217 Fts5Expr **ppNew, |
| 218 char **pzErr |
| 219 ){ |
| 220 Fts5Parse sParse; |
| 221 Fts5Token token; |
| 222 const char *z = zExpr; |
| 223 int t; /* Next token type */ |
| 224 void *pEngine; |
| 225 Fts5Expr *pNew; |
| 226 |
| 227 *ppNew = 0; |
| 228 *pzErr = 0; |
| 229 memset(&sParse, 0, sizeof(sParse)); |
| 230 pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); |
| 231 if( pEngine==0 ){ return SQLITE_NOMEM; } |
| 232 sParse.pConfig = pConfig; |
| 233 |
| 234 do { |
| 235 t = fts5ExprGetToken(&sParse, &z, &token); |
| 236 sqlite3Fts5Parser(pEngine, t, token, &sParse); |
| 237 }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); |
| 238 sqlite3Fts5ParserFree(pEngine, fts5ParseFree); |
| 239 |
| 240 assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); |
| 241 if( sParse.rc==SQLITE_OK ){ |
| 242 *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); |
| 243 if( pNew==0 ){ |
| 244 sParse.rc = SQLITE_NOMEM; |
| 245 sqlite3Fts5ParseNodeFree(sParse.pExpr); |
| 246 }else{ |
| 247 if( !sParse.pExpr ){ |
| 248 const int nByte = sizeof(Fts5ExprNode); |
| 249 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte); |
| 250 if( pNew->pRoot ){ |
| 251 pNew->pRoot->bEof = 1; |
| 252 } |
| 253 }else{ |
| 254 pNew->pRoot = sParse.pExpr; |
| 255 } |
| 256 pNew->pIndex = 0; |
| 257 pNew->pConfig = pConfig; |
| 258 pNew->apExprPhrase = sParse.apPhrase; |
| 259 pNew->nPhrase = sParse.nPhrase; |
| 260 sParse.apPhrase = 0; |
| 261 } |
| 262 }else{ |
| 263 sqlite3Fts5ParseNodeFree(sParse.pExpr); |
| 264 } |
| 265 |
| 266 sqlite3_free(sParse.apPhrase); |
| 267 *pzErr = sParse.zErr; |
| 268 return sParse.rc; |
| 269 } |
| 270 |
| 271 /* |
| 272 ** Free the expression node object passed as the only argument. |
| 273 */ |
| 274 void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ |
| 275 if( p ){ |
| 276 int i; |
| 277 for(i=0; i<p->nChild; i++){ |
| 278 sqlite3Fts5ParseNodeFree(p->apChild[i]); |
| 279 } |
| 280 sqlite3Fts5ParseNearsetFree(p->pNear); |
| 281 sqlite3_free(p); |
| 282 } |
| 283 } |
| 284 |
| 285 /* |
| 286 ** Free the expression object passed as the only argument. |
| 287 */ |
| 288 void sqlite3Fts5ExprFree(Fts5Expr *p){ |
| 289 if( p ){ |
| 290 sqlite3Fts5ParseNodeFree(p->pRoot); |
| 291 sqlite3_free(p->apExprPhrase); |
| 292 sqlite3_free(p); |
| 293 } |
| 294 } |
| 295 |
| 296 /* |
| 297 ** Argument pTerm must be a synonym iterator. Return the current rowid |
| 298 ** that it points to. |
| 299 */ |
| 300 static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ |
| 301 i64 iRet = 0; |
| 302 int bRetValid = 0; |
| 303 Fts5ExprTerm *p; |
| 304 |
| 305 assert( pTerm->pSynonym ); |
| 306 assert( bDesc==0 || bDesc==1 ); |
| 307 for(p=pTerm; p; p=p->pSynonym){ |
| 308 if( 0==sqlite3Fts5IterEof(p->pIter) ){ |
| 309 i64 iRowid = p->pIter->iRowid; |
| 310 if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ |
| 311 iRet = iRowid; |
| 312 bRetValid = 1; |
| 313 } |
| 314 } |
| 315 } |
| 316 |
| 317 if( pbEof && bRetValid==0 ) *pbEof = 1; |
| 318 return iRet; |
| 319 } |
| 320 |
| 321 /* |
| 322 ** Argument pTerm must be a synonym iterator. |
| 323 */ |
| 324 static int fts5ExprSynonymList( |
| 325 Fts5ExprTerm *pTerm, |
| 326 i64 iRowid, |
| 327 Fts5Buffer *pBuf, /* Use this buffer for space if required */ |
| 328 u8 **pa, int *pn |
| 329 ){ |
| 330 Fts5PoslistReader aStatic[4]; |
| 331 Fts5PoslistReader *aIter = aStatic; |
| 332 int nIter = 0; |
| 333 int nAlloc = 4; |
| 334 int rc = SQLITE_OK; |
| 335 Fts5ExprTerm *p; |
| 336 |
| 337 assert( pTerm->pSynonym ); |
| 338 for(p=pTerm; p; p=p->pSynonym){ |
| 339 Fts5IndexIter *pIter = p->pIter; |
| 340 if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){ |
| 341 if( pIter->nData==0 ) continue; |
| 342 if( nIter==nAlloc ){ |
| 343 int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; |
| 344 Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); |
| 345 if( aNew==0 ){ |
| 346 rc = SQLITE_NOMEM; |
| 347 goto synonym_poslist_out; |
| 348 } |
| 349 memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); |
| 350 nAlloc = nAlloc*2; |
| 351 if( aIter!=aStatic ) sqlite3_free(aIter); |
| 352 aIter = aNew; |
| 353 } |
| 354 sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); |
| 355 assert( aIter[nIter].bEof==0 ); |
| 356 nIter++; |
| 357 } |
| 358 } |
| 359 |
| 360 if( nIter==1 ){ |
| 361 *pa = (u8*)aIter[0].a; |
| 362 *pn = aIter[0].n; |
| 363 }else{ |
| 364 Fts5PoslistWriter writer = {0}; |
| 365 i64 iPrev = -1; |
| 366 fts5BufferZero(pBuf); |
| 367 while( 1 ){ |
| 368 int i; |
| 369 i64 iMin = FTS5_LARGEST_INT64; |
| 370 for(i=0; i<nIter; i++){ |
| 371 if( aIter[i].bEof==0 ){ |
| 372 if( aIter[i].iPos==iPrev ){ |
| 373 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; |
| 374 } |
| 375 if( aIter[i].iPos<iMin ){ |
| 376 iMin = aIter[i].iPos; |
| 377 } |
| 378 } |
| 379 } |
| 380 if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break; |
| 381 rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); |
| 382 iPrev = iMin; |
| 383 } |
| 384 if( rc==SQLITE_OK ){ |
| 385 *pa = pBuf->p; |
| 386 *pn = pBuf->n; |
| 387 } |
| 388 } |
| 389 |
| 390 synonym_poslist_out: |
| 391 if( aIter!=aStatic ) sqlite3_free(aIter); |
| 392 return rc; |
| 393 } |
| 394 |
| 395 |
| 396 /* |
| 397 ** All individual term iterators in pPhrase are guaranteed to be valid and |
| 398 ** pointing to the same rowid when this function is called. This function |
| 399 ** checks if the current rowid really is a match, and if so populates |
| 400 ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch |
| 401 ** is set to true if this is really a match, or false otherwise. |
| 402 ** |
| 403 ** SQLITE_OK is returned if an error occurs, or an SQLite error code |
| 404 ** otherwise. It is not considered an error code if the current rowid is |
| 405 ** not a match. |
| 406 */ |
| 407 static int fts5ExprPhraseIsMatch( |
| 408 Fts5ExprNode *pNode, /* Node pPhrase belongs to */ |
| 409 Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ |
| 410 int *pbMatch /* OUT: Set to true if really a match */ |
| 411 ){ |
| 412 Fts5PoslistWriter writer = {0}; |
| 413 Fts5PoslistReader aStatic[4]; |
| 414 Fts5PoslistReader *aIter = aStatic; |
| 415 int i; |
| 416 int rc = SQLITE_OK; |
| 417 |
| 418 fts5BufferZero(&pPhrase->poslist); |
| 419 |
| 420 /* If the aStatic[] array is not large enough, allocate a large array |
| 421 ** using sqlite3_malloc(). This approach could be improved upon. */ |
| 422 if( pPhrase->nTerm>ArraySize(aStatic) ){ |
| 423 int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; |
| 424 aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); |
| 425 if( !aIter ) return SQLITE_NOMEM; |
| 426 } |
| 427 memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); |
| 428 |
| 429 /* Initialize a term iterator for each term in the phrase */ |
| 430 for(i=0; i<pPhrase->nTerm; i++){ |
| 431 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; |
| 432 int n = 0; |
| 433 int bFlag = 0; |
| 434 u8 *a = 0; |
| 435 if( pTerm->pSynonym ){ |
| 436 Fts5Buffer buf = {0, 0, 0}; |
| 437 rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); |
| 438 if( rc ){ |
| 439 sqlite3_free(a); |
| 440 goto ismatch_out; |
| 441 } |
| 442 if( a==buf.p ) bFlag = 1; |
| 443 }else{ |
| 444 a = (u8*)pTerm->pIter->pData; |
| 445 n = pTerm->pIter->nData; |
| 446 } |
| 447 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); |
| 448 aIter[i].bFlag = (u8)bFlag; |
| 449 if( aIter[i].bEof ) goto ismatch_out; |
| 450 } |
| 451 |
| 452 while( 1 ){ |
| 453 int bMatch; |
| 454 i64 iPos = aIter[0].iPos; |
| 455 do { |
| 456 bMatch = 1; |
| 457 for(i=0; i<pPhrase->nTerm; i++){ |
| 458 Fts5PoslistReader *pPos = &aIter[i]; |
| 459 i64 iAdj = iPos + i; |
| 460 if( pPos->iPos!=iAdj ){ |
| 461 bMatch = 0; |
| 462 while( pPos->iPos<iAdj ){ |
| 463 if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; |
| 464 } |
| 465 if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; |
| 466 } |
| 467 } |
| 468 }while( bMatch==0 ); |
| 469 |
| 470 /* Append position iPos to the output */ |
| 471 rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); |
| 472 if( rc!=SQLITE_OK ) goto ismatch_out; |
| 473 |
| 474 for(i=0; i<pPhrase->nTerm; i++){ |
| 475 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; |
| 476 } |
| 477 } |
| 478 |
| 479 ismatch_out: |
| 480 *pbMatch = (pPhrase->poslist.n>0); |
| 481 for(i=0; i<pPhrase->nTerm; i++){ |
| 482 if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); |
| 483 } |
| 484 if( aIter!=aStatic ) sqlite3_free(aIter); |
| 485 return rc; |
| 486 } |
| 487 |
| 488 typedef struct Fts5LookaheadReader Fts5LookaheadReader; |
| 489 struct Fts5LookaheadReader { |
| 490 const u8 *a; /* Buffer containing position list */ |
| 491 int n; /* Size of buffer a[] in bytes */ |
| 492 int i; /* Current offset in position list */ |
| 493 i64 iPos; /* Current position */ |
| 494 i64 iLookahead; /* Next position */ |
| 495 }; |
| 496 |
| 497 #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) |
| 498 |
| 499 static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ |
| 500 p->iPos = p->iLookahead; |
| 501 if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ |
| 502 p->iLookahead = FTS5_LOOKAHEAD_EOF; |
| 503 } |
| 504 return (p->iPos==FTS5_LOOKAHEAD_EOF); |
| 505 } |
| 506 |
| 507 static int fts5LookaheadReaderInit( |
| 508 const u8 *a, int n, /* Buffer to read position list from */ |
| 509 Fts5LookaheadReader *p /* Iterator object to initialize */ |
| 510 ){ |
| 511 memset(p, 0, sizeof(Fts5LookaheadReader)); |
| 512 p->a = a; |
| 513 p->n = n; |
| 514 fts5LookaheadReaderNext(p); |
| 515 return fts5LookaheadReaderNext(p); |
| 516 } |
| 517 |
| 518 typedef struct Fts5NearTrimmer Fts5NearTrimmer; |
| 519 struct Fts5NearTrimmer { |
| 520 Fts5LookaheadReader reader; /* Input iterator */ |
| 521 Fts5PoslistWriter writer; /* Writer context */ |
| 522 Fts5Buffer *pOut; /* Output poslist */ |
| 523 }; |
| 524 |
| 525 /* |
| 526 ** The near-set object passed as the first argument contains more than |
| 527 ** one phrase. All phrases currently point to the same row. The |
| 528 ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function |
| 529 ** tests if the current row contains instances of each phrase sufficiently |
| 530 ** close together to meet the NEAR constraint. Non-zero is returned if it |
| 531 ** does, or zero otherwise. |
| 532 ** |
| 533 ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this |
| 534 ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) |
| 535 ** occurs within this function (*pRc) is set accordingly before returning. |
| 536 ** The return value is undefined in both these cases. |
| 537 ** |
| 538 ** If no error occurs and non-zero (a match) is returned, the position-list |
| 539 ** of each phrase object is edited to contain only those entries that |
| 540 ** meet the constraint before returning. |
| 541 */ |
| 542 static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ |
| 543 Fts5NearTrimmer aStatic[4]; |
| 544 Fts5NearTrimmer *a = aStatic; |
| 545 Fts5ExprPhrase **apPhrase = pNear->apPhrase; |
| 546 |
| 547 int i; |
| 548 int rc = *pRc; |
| 549 int bMatch; |
| 550 |
| 551 assert( pNear->nPhrase>1 ); |
| 552 |
| 553 /* If the aStatic[] array is not large enough, allocate a large array |
| 554 ** using sqlite3_malloc(). This approach could be improved upon. */ |
| 555 if( pNear->nPhrase>ArraySize(aStatic) ){ |
| 556 int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; |
| 557 a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); |
| 558 }else{ |
| 559 memset(aStatic, 0, sizeof(aStatic)); |
| 560 } |
| 561 if( rc!=SQLITE_OK ){ |
| 562 *pRc = rc; |
| 563 return 0; |
| 564 } |
| 565 |
| 566 /* Initialize a lookahead iterator for each phrase. After passing the |
| 567 ** buffer and buffer size to the lookaside-reader init function, zero |
| 568 ** the phrase poslist buffer. The new poslist for the phrase (containing |
| 569 ** the same entries as the original with some entries removed on account |
| 570 ** of the NEAR constraint) is written over the original even as it is |
| 571 ** being read. This is safe as the entries for the new poslist are a |
| 572 ** subset of the old, so it is not possible for data yet to be read to |
| 573 ** be overwritten. */ |
| 574 for(i=0; i<pNear->nPhrase; i++){ |
| 575 Fts5Buffer *pPoslist = &apPhrase[i]->poslist; |
| 576 fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); |
| 577 pPoslist->n = 0; |
| 578 a[i].pOut = pPoslist; |
| 579 } |
| 580 |
| 581 while( 1 ){ |
| 582 int iAdv; |
| 583 i64 iMin; |
| 584 i64 iMax; |
| 585 |
| 586 /* This block advances the phrase iterators until they point to a set of |
| 587 ** entries that together comprise a match. */ |
| 588 iMax = a[0].reader.iPos; |
| 589 do { |
| 590 bMatch = 1; |
| 591 for(i=0; i<pNear->nPhrase; i++){ |
| 592 Fts5LookaheadReader *pPos = &a[i].reader; |
| 593 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; |
| 594 if( pPos->iPos<iMin || pPos->iPos>iMax ){ |
| 595 bMatch = 0; |
| 596 while( pPos->iPos<iMin ){ |
| 597 if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; |
| 598 } |
| 599 if( pPos->iPos>iMax ) iMax = pPos->iPos; |
| 600 } |
| 601 } |
| 602 }while( bMatch==0 ); |
| 603 |
| 604 /* Add an entry to each output position list */ |
| 605 for(i=0; i<pNear->nPhrase; i++){ |
| 606 i64 iPos = a[i].reader.iPos; |
| 607 Fts5PoslistWriter *pWriter = &a[i].writer; |
| 608 if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ |
| 609 sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); |
| 610 } |
| 611 } |
| 612 |
| 613 iAdv = 0; |
| 614 iMin = a[0].reader.iLookahead; |
| 615 for(i=0; i<pNear->nPhrase; i++){ |
| 616 if( a[i].reader.iLookahead < iMin ){ |
| 617 iMin = a[i].reader.iLookahead; |
| 618 iAdv = i; |
| 619 } |
| 620 } |
| 621 if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; |
| 622 } |
| 623 |
| 624 ismatch_out: { |
| 625 int bRet = a[0].pOut->n>0; |
| 626 *pRc = rc; |
| 627 if( a!=aStatic ) sqlite3_free(a); |
| 628 return bRet; |
| 629 } |
| 630 } |
| 631 |
| 632 /* |
| 633 ** Advance iterator pIter until it points to a value equal to or laster |
| 634 ** than the initial value of *piLast. If this means the iterator points |
| 635 ** to a value laster than *piLast, update *piLast to the new lastest value. |
| 636 ** |
| 637 ** If the iterator reaches EOF, set *pbEof to true before returning. If |
| 638 ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc |
| 639 ** are set, return a non-zero value. Otherwise, return zero. |
| 640 */ |
| 641 static int fts5ExprAdvanceto( |
| 642 Fts5IndexIter *pIter, /* Iterator to advance */ |
| 643 int bDesc, /* True if iterator is "rowid DESC" */ |
| 644 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ |
| 645 int *pRc, /* OUT: Error code */ |
| 646 int *pbEof /* OUT: Set to true if EOF */ |
| 647 ){ |
| 648 i64 iLast = *piLast; |
| 649 i64 iRowid; |
| 650 |
| 651 iRowid = pIter->iRowid; |
| 652 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ |
| 653 int rc = sqlite3Fts5IterNextFrom(pIter, iLast); |
| 654 if( rc || sqlite3Fts5IterEof(pIter) ){ |
| 655 *pRc = rc; |
| 656 *pbEof = 1; |
| 657 return 1; |
| 658 } |
| 659 iRowid = pIter->iRowid; |
| 660 assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); |
| 661 } |
| 662 *piLast = iRowid; |
| 663 |
| 664 return 0; |
| 665 } |
| 666 |
| 667 static int fts5ExprSynonymAdvanceto( |
| 668 Fts5ExprTerm *pTerm, /* Term iterator to advance */ |
| 669 int bDesc, /* True if iterator is "rowid DESC" */ |
| 670 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ |
| 671 int *pRc /* OUT: Error code */ |
| 672 ){ |
| 673 int rc = SQLITE_OK; |
| 674 i64 iLast = *piLast; |
| 675 Fts5ExprTerm *p; |
| 676 int bEof = 0; |
| 677 |
| 678 for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ |
| 679 if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
| 680 i64 iRowid = p->pIter->iRowid; |
| 681 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ |
| 682 rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); |
| 683 } |
| 684 } |
| 685 } |
| 686 |
| 687 if( rc!=SQLITE_OK ){ |
| 688 *pRc = rc; |
| 689 bEof = 1; |
| 690 }else{ |
| 691 *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); |
| 692 } |
| 693 return bEof; |
| 694 } |
| 695 |
| 696 |
| 697 static int fts5ExprNearTest( |
| 698 int *pRc, |
| 699 Fts5Expr *pExpr, /* Expression that pNear is a part of */ |
| 700 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ |
| 701 ){ |
| 702 Fts5ExprNearset *pNear = pNode->pNear; |
| 703 int rc = *pRc; |
| 704 |
| 705 if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
| 706 Fts5ExprTerm *pTerm; |
| 707 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; |
| 708 pPhrase->poslist.n = 0; |
| 709 for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ |
| 710 Fts5IndexIter *pIter = pTerm->pIter; |
| 711 if( sqlite3Fts5IterEof(pIter)==0 ){ |
| 712 if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ |
| 713 pPhrase->poslist.n = 1; |
| 714 } |
| 715 } |
| 716 } |
| 717 return pPhrase->poslist.n; |
| 718 }else{ |
| 719 int i; |
| 720 |
| 721 /* Check that each phrase in the nearset matches the current row. |
| 722 ** Populate the pPhrase->poslist buffers at the same time. If any |
| 723 ** phrase is not a match, break out of the loop early. */ |
| 724 for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ |
| 725 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 726 if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ |
| 727 int bMatch = 0; |
| 728 rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); |
| 729 if( bMatch==0 ) break; |
| 730 }else{ |
| 731 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; |
| 732 fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); |
| 733 } |
| 734 } |
| 735 |
| 736 *pRc = rc; |
| 737 if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ |
| 738 return 1; |
| 739 } |
| 740 return 0; |
| 741 } |
| 742 } |
| 743 |
| 744 |
| 745 /* |
| 746 ** Initialize all term iterators in the pNear object. If any term is found |
| 747 ** to match no documents at all, return immediately without initializing any |
| 748 ** further iterators. |
| 749 ** |
| 750 ** If an error occurs, return an SQLite error code. Otherwise, return |
| 751 ** SQLITE_OK. It is not considered an error if some term matches zero |
| 752 ** documents. |
| 753 */ |
| 754 static int fts5ExprNearInitAll( |
| 755 Fts5Expr *pExpr, |
| 756 Fts5ExprNode *pNode |
| 757 ){ |
| 758 Fts5ExprNearset *pNear = pNode->pNear; |
| 759 int i; |
| 760 |
| 761 assert( pNode->bNomatch==0 ); |
| 762 for(i=0; i<pNear->nPhrase; i++){ |
| 763 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 764 if( pPhrase->nTerm==0 ){ |
| 765 pNode->bEof = 1; |
| 766 return SQLITE_OK; |
| 767 }else{ |
| 768 int j; |
| 769 for(j=0; j<pPhrase->nTerm; j++){ |
| 770 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; |
| 771 Fts5ExprTerm *p; |
| 772 int bHit = 0; |
| 773 |
| 774 for(p=pTerm; p; p=p->pSynonym){ |
| 775 int rc; |
| 776 if( p->pIter ){ |
| 777 sqlite3Fts5IterClose(p->pIter); |
| 778 p->pIter = 0; |
| 779 } |
| 780 rc = sqlite3Fts5IndexQuery( |
| 781 pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), |
| 782 (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | |
| 783 (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), |
| 784 pNear->pColset, |
| 785 &p->pIter |
| 786 ); |
| 787 assert( (rc==SQLITE_OK)==(p->pIter!=0) ); |
| 788 if( rc!=SQLITE_OK ) return rc; |
| 789 if( 0==sqlite3Fts5IterEof(p->pIter) ){ |
| 790 bHit = 1; |
| 791 } |
| 792 } |
| 793 |
| 794 if( bHit==0 ){ |
| 795 pNode->bEof = 1; |
| 796 return SQLITE_OK; |
| 797 } |
| 798 } |
| 799 } |
| 800 } |
| 801 |
| 802 pNode->bEof = 0; |
| 803 return SQLITE_OK; |
| 804 } |
| 805 |
| 806 /* |
| 807 ** If pExpr is an ASC iterator, this function returns a value with the |
| 808 ** same sign as: |
| 809 ** |
| 810 ** (iLhs - iRhs) |
| 811 ** |
| 812 ** Otherwise, if this is a DESC iterator, the opposite is returned: |
| 813 ** |
| 814 ** (iRhs - iLhs) |
| 815 */ |
| 816 static int fts5RowidCmp( |
| 817 Fts5Expr *pExpr, |
| 818 i64 iLhs, |
| 819 i64 iRhs |
| 820 ){ |
| 821 assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); |
| 822 if( pExpr->bDesc==0 ){ |
| 823 if( iLhs<iRhs ) return -1; |
| 824 return (iLhs > iRhs); |
| 825 }else{ |
| 826 if( iLhs>iRhs ) return -1; |
| 827 return (iLhs < iRhs); |
| 828 } |
| 829 } |
| 830 |
| 831 static void fts5ExprSetEof(Fts5ExprNode *pNode){ |
| 832 int i; |
| 833 pNode->bEof = 1; |
| 834 pNode->bNomatch = 0; |
| 835 for(i=0; i<pNode->nChild; i++){ |
| 836 fts5ExprSetEof(pNode->apChild[i]); |
| 837 } |
| 838 } |
| 839 |
| 840 static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ |
| 841 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ |
| 842 Fts5ExprNearset *pNear = pNode->pNear; |
| 843 int i; |
| 844 for(i=0; i<pNear->nPhrase; i++){ |
| 845 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 846 pPhrase->poslist.n = 0; |
| 847 } |
| 848 }else{ |
| 849 int i; |
| 850 for(i=0; i<pNode->nChild; i++){ |
| 851 fts5ExprNodeZeroPoslist(pNode->apChild[i]); |
| 852 } |
| 853 } |
| 854 } |
| 855 |
| 856 |
| 857 |
| 858 /* |
| 859 ** Compare the values currently indicated by the two nodes as follows: |
| 860 ** |
| 861 ** res = (*p1) - (*p2) |
| 862 ** |
| 863 ** Nodes that point to values that come later in the iteration order are |
| 864 ** considered to be larger. Nodes at EOF are the largest of all. |
| 865 ** |
| 866 ** This means that if the iteration order is ASC, then numerically larger |
| 867 ** rowids are considered larger. Or if it is the default DESC, numerically |
| 868 ** smaller rowids are larger. |
| 869 */ |
| 870 static int fts5NodeCompare( |
| 871 Fts5Expr *pExpr, |
| 872 Fts5ExprNode *p1, |
| 873 Fts5ExprNode *p2 |
| 874 ){ |
| 875 if( p2->bEof ) return -1; |
| 876 if( p1->bEof ) return +1; |
| 877 return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); |
| 878 } |
| 879 |
| 880 /* |
| 881 ** All individual term iterators in pNear are guaranteed to be valid when |
| 882 ** this function is called. This function checks if all term iterators |
| 883 ** point to the same rowid, and if not, advances them until they do. |
| 884 ** If an EOF is reached before this happens, *pbEof is set to true before |
| 885 ** returning. |
| 886 ** |
| 887 ** SQLITE_OK is returned if an error occurs, or an SQLite error code |
| 888 ** otherwise. It is not considered an error code if an iterator reaches |
| 889 ** EOF. |
| 890 */ |
| 891 static int fts5ExprNodeTest_STRING( |
| 892 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 893 Fts5ExprNode *pNode |
| 894 ){ |
| 895 Fts5ExprNearset *pNear = pNode->pNear; |
| 896 Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; |
| 897 int rc = SQLITE_OK; |
| 898 i64 iLast; /* Lastest rowid any iterator points to */ |
| 899 int i, j; /* Phrase and token index, respectively */ |
| 900 int bMatch; /* True if all terms are at the same rowid */ |
| 901 const int bDesc = pExpr->bDesc; |
| 902 |
| 903 /* Check that this node should not be FTS5_TERM */ |
| 904 assert( pNear->nPhrase>1 |
| 905 || pNear->apPhrase[0]->nTerm>1 |
| 906 || pNear->apPhrase[0]->aTerm[0].pSynonym |
| 907 ); |
| 908 |
| 909 /* Initialize iLast, the "lastest" rowid any iterator points to. If the |
| 910 ** iterator skips through rowids in the default ascending order, this means |
| 911 ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it |
| 912 ** means the minimum rowid. */ |
| 913 if( pLeft->aTerm[0].pSynonym ){ |
| 914 iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); |
| 915 }else{ |
| 916 iLast = pLeft->aTerm[0].pIter->iRowid; |
| 917 } |
| 918 |
| 919 do { |
| 920 bMatch = 1; |
| 921 for(i=0; i<pNear->nPhrase; i++){ |
| 922 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 923 for(j=0; j<pPhrase->nTerm; j++){ |
| 924 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; |
| 925 if( pTerm->pSynonym ){ |
| 926 i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); |
| 927 if( iRowid==iLast ) continue; |
| 928 bMatch = 0; |
| 929 if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ |
| 930 pNode->bNomatch = 0; |
| 931 pNode->bEof = 1; |
| 932 return rc; |
| 933 } |
| 934 }else{ |
| 935 Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; |
| 936 if( pIter->iRowid==iLast || pIter->bEof ) continue; |
| 937 bMatch = 0; |
| 938 if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ |
| 939 return rc; |
| 940 } |
| 941 } |
| 942 } |
| 943 } |
| 944 }while( bMatch==0 ); |
| 945 |
| 946 pNode->iRowid = iLast; |
| 947 pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK); |
| 948 assert( pNode->bEof==0 || pNode->bNomatch==0 ); |
| 949 |
| 950 return rc; |
| 951 } |
| 952 |
| 953 /* |
| 954 ** Advance the first term iterator in the first phrase of pNear. Set output |
| 955 ** variable *pbEof to true if it reaches EOF or if an error occurs. |
| 956 ** |
| 957 ** Return SQLITE_OK if successful, or an SQLite error code if an error |
| 958 ** occurs. |
| 959 */ |
| 960 static int fts5ExprNodeNext_STRING( |
| 961 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 962 Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ |
| 963 int bFromValid, |
| 964 i64 iFrom |
| 965 ){ |
| 966 Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; |
| 967 int rc = SQLITE_OK; |
| 968 |
| 969 pNode->bNomatch = 0; |
| 970 if( pTerm->pSynonym ){ |
| 971 int bEof = 1; |
| 972 Fts5ExprTerm *p; |
| 973 |
| 974 /* Find the firstest rowid any synonym points to. */ |
| 975 i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); |
| 976 |
| 977 /* Advance each iterator that currently points to iRowid. Or, if iFrom |
| 978 ** is valid - each iterator that points to a rowid before iFrom. */ |
| 979 for(p=pTerm; p; p=p->pSynonym){ |
| 980 if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
| 981 i64 ii = p->pIter->iRowid; |
| 982 if( ii==iRowid |
| 983 || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) |
| 984 ){ |
| 985 if( bFromValid ){ |
| 986 rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); |
| 987 }else{ |
| 988 rc = sqlite3Fts5IterNext(p->pIter); |
| 989 } |
| 990 if( rc!=SQLITE_OK ) break; |
| 991 if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
| 992 bEof = 0; |
| 993 } |
| 994 }else{ |
| 995 bEof = 0; |
| 996 } |
| 997 } |
| 998 } |
| 999 |
| 1000 /* Set the EOF flag if either all synonym iterators are at EOF or an |
| 1001 ** error has occurred. */ |
| 1002 pNode->bEof = (rc || bEof); |
| 1003 }else{ |
| 1004 Fts5IndexIter *pIter = pTerm->pIter; |
| 1005 |
| 1006 assert( Fts5NodeIsString(pNode) ); |
| 1007 if( bFromValid ){ |
| 1008 rc = sqlite3Fts5IterNextFrom(pIter, iFrom); |
| 1009 }else{ |
| 1010 rc = sqlite3Fts5IterNext(pIter); |
| 1011 } |
| 1012 |
| 1013 pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); |
| 1014 } |
| 1015 |
| 1016 if( pNode->bEof==0 ){ |
| 1017 assert( rc==SQLITE_OK ); |
| 1018 rc = fts5ExprNodeTest_STRING(pExpr, pNode); |
| 1019 } |
| 1020 |
| 1021 return rc; |
| 1022 } |
| 1023 |
| 1024 |
| 1025 static int fts5ExprNodeTest_TERM( |
| 1026 Fts5Expr *pExpr, /* Expression that pNear is a part of */ |
| 1027 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ |
| 1028 ){ |
| 1029 /* As this "NEAR" object is actually a single phrase that consists |
| 1030 ** of a single term only, grab pointers into the poslist managed by the |
| 1031 ** fts5_index.c iterator object. This is much faster than synthesizing |
| 1032 ** a new poslist the way we have to for more complicated phrase or NEAR |
| 1033 ** expressions. */ |
| 1034 Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; |
| 1035 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; |
| 1036 |
| 1037 assert( pNode->eType==FTS5_TERM ); |
| 1038 assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); |
| 1039 assert( pPhrase->aTerm[0].pSynonym==0 ); |
| 1040 |
| 1041 pPhrase->poslist.n = pIter->nData; |
| 1042 if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ |
| 1043 pPhrase->poslist.p = (u8*)pIter->pData; |
| 1044 } |
| 1045 pNode->iRowid = pIter->iRowid; |
| 1046 pNode->bNomatch = (pPhrase->poslist.n==0); |
| 1047 return SQLITE_OK; |
| 1048 } |
| 1049 |
| 1050 /* |
| 1051 ** xNext() method for a node of type FTS5_TERM. |
| 1052 */ |
| 1053 static int fts5ExprNodeNext_TERM( |
| 1054 Fts5Expr *pExpr, |
| 1055 Fts5ExprNode *pNode, |
| 1056 int bFromValid, |
| 1057 i64 iFrom |
| 1058 ){ |
| 1059 int rc; |
| 1060 Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; |
| 1061 |
| 1062 assert( pNode->bEof==0 ); |
| 1063 if( bFromValid ){ |
| 1064 rc = sqlite3Fts5IterNextFrom(pIter, iFrom); |
| 1065 }else{ |
| 1066 rc = sqlite3Fts5IterNext(pIter); |
| 1067 } |
| 1068 if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ |
| 1069 rc = fts5ExprNodeTest_TERM(pExpr, pNode); |
| 1070 }else{ |
| 1071 pNode->bEof = 1; |
| 1072 pNode->bNomatch = 0; |
| 1073 } |
| 1074 return rc; |
| 1075 } |
| 1076 |
| 1077 static void fts5ExprNodeTest_OR( |
| 1078 Fts5Expr *pExpr, /* Expression of which pNode is a part */ |
| 1079 Fts5ExprNode *pNode /* Expression node to test */ |
| 1080 ){ |
| 1081 Fts5ExprNode *pNext = pNode->apChild[0]; |
| 1082 int i; |
| 1083 |
| 1084 for(i=1; i<pNode->nChild; i++){ |
| 1085 Fts5ExprNode *pChild = pNode->apChild[i]; |
| 1086 int cmp = fts5NodeCompare(pExpr, pNext, pChild); |
| 1087 if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ |
| 1088 pNext = pChild; |
| 1089 } |
| 1090 } |
| 1091 pNode->iRowid = pNext->iRowid; |
| 1092 pNode->bEof = pNext->bEof; |
| 1093 pNode->bNomatch = pNext->bNomatch; |
| 1094 } |
| 1095 |
| 1096 static int fts5ExprNodeNext_OR( |
| 1097 Fts5Expr *pExpr, |
| 1098 Fts5ExprNode *pNode, |
| 1099 int bFromValid, |
| 1100 i64 iFrom |
| 1101 ){ |
| 1102 int i; |
| 1103 i64 iLast = pNode->iRowid; |
| 1104 |
| 1105 for(i=0; i<pNode->nChild; i++){ |
| 1106 Fts5ExprNode *p1 = pNode->apChild[i]; |
| 1107 assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); |
| 1108 if( p1->bEof==0 ){ |
| 1109 if( (p1->iRowid==iLast) |
| 1110 || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) |
| 1111 ){ |
| 1112 int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); |
| 1113 if( rc!=SQLITE_OK ) return rc; |
| 1114 } |
| 1115 } |
| 1116 } |
| 1117 |
| 1118 fts5ExprNodeTest_OR(pExpr, pNode); |
| 1119 return SQLITE_OK; |
| 1120 } |
| 1121 |
| 1122 /* |
| 1123 ** Argument pNode is an FTS5_AND node. |
| 1124 */ |
| 1125 static int fts5ExprNodeTest_AND( |
| 1126 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 1127 Fts5ExprNode *pAnd /* FTS5_AND node to advance */ |
| 1128 ){ |
| 1129 int iChild; |
| 1130 i64 iLast = pAnd->iRowid; |
| 1131 int rc = SQLITE_OK; |
| 1132 int bMatch; |
| 1133 |
| 1134 assert( pAnd->bEof==0 ); |
| 1135 do { |
| 1136 pAnd->bNomatch = 0; |
| 1137 bMatch = 1; |
| 1138 for(iChild=0; iChild<pAnd->nChild; iChild++){ |
| 1139 Fts5ExprNode *pChild = pAnd->apChild[iChild]; |
| 1140 int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); |
| 1141 if( cmp>0 ){ |
| 1142 /* Advance pChild until it points to iLast or laster */ |
| 1143 rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); |
| 1144 if( rc!=SQLITE_OK ) return rc; |
| 1145 } |
| 1146 |
| 1147 /* If the child node is now at EOF, so is the parent AND node. Otherwise, |
| 1148 ** the child node is guaranteed to have advanced at least as far as |
| 1149 ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the |
| 1150 ** new lastest rowid seen so far. */ |
| 1151 assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); |
| 1152 if( pChild->bEof ){ |
| 1153 fts5ExprSetEof(pAnd); |
| 1154 bMatch = 1; |
| 1155 break; |
| 1156 }else if( iLast!=pChild->iRowid ){ |
| 1157 bMatch = 0; |
| 1158 iLast = pChild->iRowid; |
| 1159 } |
| 1160 |
| 1161 if( pChild->bNomatch ){ |
| 1162 pAnd->bNomatch = 1; |
| 1163 } |
| 1164 } |
| 1165 }while( bMatch==0 ); |
| 1166 |
| 1167 if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ |
| 1168 fts5ExprNodeZeroPoslist(pAnd); |
| 1169 } |
| 1170 pAnd->iRowid = iLast; |
| 1171 return SQLITE_OK; |
| 1172 } |
| 1173 |
| 1174 static int fts5ExprNodeNext_AND( |
| 1175 Fts5Expr *pExpr, |
| 1176 Fts5ExprNode *pNode, |
| 1177 int bFromValid, |
| 1178 i64 iFrom |
| 1179 ){ |
| 1180 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); |
| 1181 if( rc==SQLITE_OK ){ |
| 1182 rc = fts5ExprNodeTest_AND(pExpr, pNode); |
| 1183 } |
| 1184 return rc; |
| 1185 } |
| 1186 |
| 1187 static int fts5ExprNodeTest_NOT( |
| 1188 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
| 1189 Fts5ExprNode *pNode /* FTS5_NOT node to advance */ |
| 1190 ){ |
| 1191 int rc = SQLITE_OK; |
| 1192 Fts5ExprNode *p1 = pNode->apChild[0]; |
| 1193 Fts5ExprNode *p2 = pNode->apChild[1]; |
| 1194 assert( pNode->nChild==2 ); |
| 1195 |
| 1196 while( rc==SQLITE_OK && p1->bEof==0 ){ |
| 1197 int cmp = fts5NodeCompare(pExpr, p1, p2); |
| 1198 if( cmp>0 ){ |
| 1199 rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); |
| 1200 cmp = fts5NodeCompare(pExpr, p1, p2); |
| 1201 } |
| 1202 assert( rc!=SQLITE_OK || cmp<=0 ); |
| 1203 if( cmp || p2->bNomatch ) break; |
| 1204 rc = fts5ExprNodeNext(pExpr, p1, 0, 0); |
| 1205 } |
| 1206 pNode->bEof = p1->bEof; |
| 1207 pNode->bNomatch = p1->bNomatch; |
| 1208 pNode->iRowid = p1->iRowid; |
| 1209 if( p1->bEof ){ |
| 1210 fts5ExprNodeZeroPoslist(p2); |
| 1211 } |
| 1212 return rc; |
| 1213 } |
| 1214 |
| 1215 static int fts5ExprNodeNext_NOT( |
| 1216 Fts5Expr *pExpr, |
| 1217 Fts5ExprNode *pNode, |
| 1218 int bFromValid, |
| 1219 i64 iFrom |
| 1220 ){ |
| 1221 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); |
| 1222 if( rc==SQLITE_OK ){ |
| 1223 rc = fts5ExprNodeTest_NOT(pExpr, pNode); |
| 1224 } |
| 1225 return rc; |
| 1226 } |
| 1227 |
| 1228 /* |
| 1229 ** If pNode currently points to a match, this function returns SQLITE_OK |
| 1230 ** without modifying it. Otherwise, pNode is advanced until it does point |
| 1231 ** to a match or EOF is reached. |
| 1232 */ |
| 1233 static int fts5ExprNodeTest( |
| 1234 Fts5Expr *pExpr, /* Expression of which pNode is a part */ |
| 1235 Fts5ExprNode *pNode /* Expression node to test */ |
| 1236 ){ |
| 1237 int rc = SQLITE_OK; |
| 1238 if( pNode->bEof==0 ){ |
| 1239 switch( pNode->eType ){ |
| 1240 |
| 1241 case FTS5_STRING: { |
| 1242 rc = fts5ExprNodeTest_STRING(pExpr, pNode); |
| 1243 break; |
| 1244 } |
| 1245 |
| 1246 case FTS5_TERM: { |
| 1247 rc = fts5ExprNodeTest_TERM(pExpr, pNode); |
| 1248 break; |
| 1249 } |
| 1250 |
| 1251 case FTS5_AND: { |
| 1252 rc = fts5ExprNodeTest_AND(pExpr, pNode); |
| 1253 break; |
| 1254 } |
| 1255 |
| 1256 case FTS5_OR: { |
| 1257 fts5ExprNodeTest_OR(pExpr, pNode); |
| 1258 break; |
| 1259 } |
| 1260 |
| 1261 default: assert( pNode->eType==FTS5_NOT ); { |
| 1262 rc = fts5ExprNodeTest_NOT(pExpr, pNode); |
| 1263 break; |
| 1264 } |
| 1265 } |
| 1266 } |
| 1267 return rc; |
| 1268 } |
| 1269 |
| 1270 |
| 1271 /* |
| 1272 ** Set node pNode, which is part of expression pExpr, to point to the first |
| 1273 ** match. If there are no matches, set the Node.bEof flag to indicate EOF. |
| 1274 ** |
| 1275 ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. |
| 1276 ** It is not an error if there are no matches. |
| 1277 */ |
| 1278 static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ |
| 1279 int rc = SQLITE_OK; |
| 1280 pNode->bEof = 0; |
| 1281 pNode->bNomatch = 0; |
| 1282 |
| 1283 if( Fts5NodeIsString(pNode) ){ |
| 1284 /* Initialize all term iterators in the NEAR object. */ |
| 1285 rc = fts5ExprNearInitAll(pExpr, pNode); |
| 1286 }else if( pNode->xNext==0 ){ |
| 1287 pNode->bEof = 1; |
| 1288 }else{ |
| 1289 int i; |
| 1290 int nEof = 0; |
| 1291 for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){ |
| 1292 Fts5ExprNode *pChild = pNode->apChild[i]; |
| 1293 rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); |
| 1294 assert( pChild->bEof==0 || pChild->bEof==1 ); |
| 1295 nEof += pChild->bEof; |
| 1296 } |
| 1297 pNode->iRowid = pNode->apChild[0]->iRowid; |
| 1298 |
| 1299 switch( pNode->eType ){ |
| 1300 case FTS5_AND: |
| 1301 if( nEof>0 ) fts5ExprSetEof(pNode); |
| 1302 break; |
| 1303 |
| 1304 case FTS5_OR: |
| 1305 if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); |
| 1306 break; |
| 1307 |
| 1308 default: |
| 1309 assert( pNode->eType==FTS5_NOT ); |
| 1310 pNode->bEof = pNode->apChild[0]->bEof; |
| 1311 break; |
| 1312 } |
| 1313 } |
| 1314 |
| 1315 if( rc==SQLITE_OK ){ |
| 1316 rc = fts5ExprNodeTest(pExpr, pNode); |
| 1317 } |
| 1318 return rc; |
| 1319 } |
| 1320 |
| 1321 |
| 1322 /* |
| 1323 ** Begin iterating through the set of documents in index pIdx matched by |
| 1324 ** the MATCH expression passed as the first argument. If the "bDesc" |
| 1325 ** parameter is passed a non-zero value, iteration is in descending rowid |
| 1326 ** order. Or, if it is zero, in ascending order. |
| 1327 ** |
| 1328 ** If iterating in ascending rowid order (bDesc==0), the first document |
| 1329 ** visited is that with the smallest rowid that is larger than or equal |
| 1330 ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), |
| 1331 ** then the first document visited must have a rowid smaller than or |
| 1332 ** equal to iFirst. |
| 1333 ** |
| 1334 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It |
| 1335 ** is not considered an error if the query does not match any documents. |
| 1336 */ |
| 1337 int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ |
| 1338 Fts5ExprNode *pRoot = p->pRoot; |
| 1339 int rc; /* Return code */ |
| 1340 |
| 1341 p->pIndex = pIdx; |
| 1342 p->bDesc = bDesc; |
| 1343 rc = fts5ExprNodeFirst(p, pRoot); |
| 1344 |
| 1345 /* If not at EOF but the current rowid occurs earlier than iFirst in |
| 1346 ** the iteration order, move to document iFirst or later. */ |
| 1347 if( rc==SQLITE_OK |
| 1348 && 0==pRoot->bEof |
| 1349 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 |
| 1350 ){ |
| 1351 rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); |
| 1352 } |
| 1353 |
| 1354 /* If the iterator is not at a real match, skip forward until it is. */ |
| 1355 while( pRoot->bNomatch ){ |
| 1356 assert( pRoot->bEof==0 && rc==SQLITE_OK ); |
| 1357 rc = fts5ExprNodeNext(p, pRoot, 0, 0); |
| 1358 } |
| 1359 return rc; |
| 1360 } |
| 1361 |
| 1362 /* |
| 1363 ** Move to the next document |
| 1364 ** |
| 1365 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It |
| 1366 ** is not considered an error if the query does not match any documents. |
| 1367 */ |
| 1368 int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ |
| 1369 int rc; |
| 1370 Fts5ExprNode *pRoot = p->pRoot; |
| 1371 assert( pRoot->bEof==0 && pRoot->bNomatch==0 ); |
| 1372 do { |
| 1373 rc = fts5ExprNodeNext(p, pRoot, 0, 0); |
| 1374 assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) ); |
| 1375 }while( pRoot->bNomatch ); |
| 1376 if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ |
| 1377 pRoot->bEof = 1; |
| 1378 } |
| 1379 return rc; |
| 1380 } |
| 1381 |
| 1382 int sqlite3Fts5ExprEof(Fts5Expr *p){ |
| 1383 return p->pRoot->bEof; |
| 1384 } |
| 1385 |
| 1386 i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ |
| 1387 return p->pRoot->iRowid; |
| 1388 } |
| 1389 |
| 1390 static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ |
| 1391 int rc = SQLITE_OK; |
| 1392 *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); |
| 1393 return rc; |
| 1394 } |
| 1395 |
| 1396 /* |
| 1397 ** Free the phrase object passed as the only argument. |
| 1398 */ |
| 1399 static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ |
| 1400 if( pPhrase ){ |
| 1401 int i; |
| 1402 for(i=0; i<pPhrase->nTerm; i++){ |
| 1403 Fts5ExprTerm *pSyn; |
| 1404 Fts5ExprTerm *pNext; |
| 1405 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; |
| 1406 sqlite3_free(pTerm->zTerm); |
| 1407 sqlite3Fts5IterClose(pTerm->pIter); |
| 1408 for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ |
| 1409 pNext = pSyn->pSynonym; |
| 1410 sqlite3Fts5IterClose(pSyn->pIter); |
| 1411 fts5BufferFree((Fts5Buffer*)&pSyn[1]); |
| 1412 sqlite3_free(pSyn); |
| 1413 } |
| 1414 } |
| 1415 if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); |
| 1416 sqlite3_free(pPhrase); |
| 1417 } |
| 1418 } |
| 1419 |
| 1420 /* |
| 1421 ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated |
| 1422 ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is |
| 1423 ** appended to it and the results returned. |
| 1424 ** |
| 1425 ** If an OOM error occurs, both the pNear and pPhrase objects are freed and |
| 1426 ** NULL returned. |
| 1427 */ |
| 1428 Fts5ExprNearset *sqlite3Fts5ParseNearset( |
| 1429 Fts5Parse *pParse, /* Parse context */ |
| 1430 Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ |
| 1431 Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ |
| 1432 ){ |
| 1433 const int SZALLOC = 8; |
| 1434 Fts5ExprNearset *pRet = 0; |
| 1435 |
| 1436 if( pParse->rc==SQLITE_OK ){ |
| 1437 if( pPhrase==0 ){ |
| 1438 return pNear; |
| 1439 } |
| 1440 if( pNear==0 ){ |
| 1441 int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); |
| 1442 pRet = sqlite3_malloc(nByte); |
| 1443 if( pRet==0 ){ |
| 1444 pParse->rc = SQLITE_NOMEM; |
| 1445 }else{ |
| 1446 memset(pRet, 0, nByte); |
| 1447 } |
| 1448 }else if( (pNear->nPhrase % SZALLOC)==0 ){ |
| 1449 int nNew = pNear->nPhrase + SZALLOC; |
| 1450 int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); |
| 1451 |
| 1452 pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); |
| 1453 if( pRet==0 ){ |
| 1454 pParse->rc = SQLITE_NOMEM; |
| 1455 } |
| 1456 }else{ |
| 1457 pRet = pNear; |
| 1458 } |
| 1459 } |
| 1460 |
| 1461 if( pRet==0 ){ |
| 1462 assert( pParse->rc!=SQLITE_OK ); |
| 1463 sqlite3Fts5ParseNearsetFree(pNear); |
| 1464 sqlite3Fts5ParsePhraseFree(pPhrase); |
| 1465 }else{ |
| 1466 if( pRet->nPhrase>0 ){ |
| 1467 Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; |
| 1468 assert( pLast==pParse->apPhrase[pParse->nPhrase-2] ); |
| 1469 if( pPhrase->nTerm==0 ){ |
| 1470 fts5ExprPhraseFree(pPhrase); |
| 1471 pRet->nPhrase--; |
| 1472 pParse->nPhrase--; |
| 1473 pPhrase = pLast; |
| 1474 }else if( pLast->nTerm==0 ){ |
| 1475 fts5ExprPhraseFree(pLast); |
| 1476 pParse->apPhrase[pParse->nPhrase-2] = pPhrase; |
| 1477 pParse->nPhrase--; |
| 1478 pRet->nPhrase--; |
| 1479 } |
| 1480 } |
| 1481 pRet->apPhrase[pRet->nPhrase++] = pPhrase; |
| 1482 } |
| 1483 return pRet; |
| 1484 } |
| 1485 |
| 1486 typedef struct TokenCtx TokenCtx; |
| 1487 struct TokenCtx { |
| 1488 Fts5ExprPhrase *pPhrase; |
| 1489 int rc; |
| 1490 }; |
| 1491 |
| 1492 /* |
| 1493 ** Callback for tokenizing terms used by ParseTerm(). |
| 1494 */ |
| 1495 static int fts5ParseTokenize( |
| 1496 void *pContext, /* Pointer to Fts5InsertCtx object */ |
| 1497 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 1498 const char *pToken, /* Buffer containing token */ |
| 1499 int nToken, /* Size of token in bytes */ |
| 1500 int iUnused1, /* Start offset of token */ |
| 1501 int iUnused2 /* End offset of token */ |
| 1502 ){ |
| 1503 int rc = SQLITE_OK; |
| 1504 const int SZALLOC = 8; |
| 1505 TokenCtx *pCtx = (TokenCtx*)pContext; |
| 1506 Fts5ExprPhrase *pPhrase = pCtx->pPhrase; |
| 1507 |
| 1508 UNUSED_PARAM2(iUnused1, iUnused2); |
| 1509 |
| 1510 /* If an error has already occurred, this is a no-op */ |
| 1511 if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; |
| 1512 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
| 1513 |
| 1514 if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ |
| 1515 Fts5ExprTerm *pSyn; |
| 1516 int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; |
| 1517 pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); |
| 1518 if( pSyn==0 ){ |
| 1519 rc = SQLITE_NOMEM; |
| 1520 }else{ |
| 1521 memset(pSyn, 0, nByte); |
| 1522 pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); |
| 1523 memcpy(pSyn->zTerm, pToken, nToken); |
| 1524 pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; |
| 1525 pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; |
| 1526 } |
| 1527 }else{ |
| 1528 Fts5ExprTerm *pTerm; |
| 1529 if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ |
| 1530 Fts5ExprPhrase *pNew; |
| 1531 int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); |
| 1532 |
| 1533 pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, |
| 1534 sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew |
| 1535 ); |
| 1536 if( pNew==0 ){ |
| 1537 rc = SQLITE_NOMEM; |
| 1538 }else{ |
| 1539 if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); |
| 1540 pCtx->pPhrase = pPhrase = pNew; |
| 1541 pNew->nTerm = nNew - SZALLOC; |
| 1542 } |
| 1543 } |
| 1544 |
| 1545 if( rc==SQLITE_OK ){ |
| 1546 pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; |
| 1547 memset(pTerm, 0, sizeof(Fts5ExprTerm)); |
| 1548 pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); |
| 1549 } |
| 1550 } |
| 1551 |
| 1552 pCtx->rc = rc; |
| 1553 return rc; |
| 1554 } |
| 1555 |
| 1556 |
| 1557 /* |
| 1558 ** Free the phrase object passed as the only argument. |
| 1559 */ |
| 1560 void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ |
| 1561 fts5ExprPhraseFree(pPhrase); |
| 1562 } |
| 1563 |
| 1564 /* |
| 1565 ** Free the phrase object passed as the second argument. |
| 1566 */ |
| 1567 void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ |
| 1568 if( pNear ){ |
| 1569 int i; |
| 1570 for(i=0; i<pNear->nPhrase; i++){ |
| 1571 fts5ExprPhraseFree(pNear->apPhrase[i]); |
| 1572 } |
| 1573 sqlite3_free(pNear->pColset); |
| 1574 sqlite3_free(pNear); |
| 1575 } |
| 1576 } |
| 1577 |
| 1578 void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ |
| 1579 assert( pParse->pExpr==0 ); |
| 1580 pParse->pExpr = p; |
| 1581 } |
| 1582 |
| 1583 /* |
| 1584 ** This function is called by the parser to process a string token. The |
| 1585 ** string may or may not be quoted. In any case it is tokenized and a |
| 1586 ** phrase object consisting of all tokens returned. |
| 1587 */ |
| 1588 Fts5ExprPhrase *sqlite3Fts5ParseTerm( |
| 1589 Fts5Parse *pParse, /* Parse context */ |
| 1590 Fts5ExprPhrase *pAppend, /* Phrase to append to */ |
| 1591 Fts5Token *pToken, /* String to tokenize */ |
| 1592 int bPrefix /* True if there is a trailing "*" */ |
| 1593 ){ |
| 1594 Fts5Config *pConfig = pParse->pConfig; |
| 1595 TokenCtx sCtx; /* Context object passed to callback */ |
| 1596 int rc; /* Tokenize return code */ |
| 1597 char *z = 0; |
| 1598 |
| 1599 memset(&sCtx, 0, sizeof(TokenCtx)); |
| 1600 sCtx.pPhrase = pAppend; |
| 1601 |
| 1602 rc = fts5ParseStringFromToken(pToken, &z); |
| 1603 if( rc==SQLITE_OK ){ |
| 1604 int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0); |
| 1605 int n; |
| 1606 sqlite3Fts5Dequote(z); |
| 1607 n = (int)strlen(z); |
| 1608 rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); |
| 1609 } |
| 1610 sqlite3_free(z); |
| 1611 if( rc || (rc = sCtx.rc) ){ |
| 1612 pParse->rc = rc; |
| 1613 fts5ExprPhraseFree(sCtx.pPhrase); |
| 1614 sCtx.pPhrase = 0; |
| 1615 }else{ |
| 1616 |
| 1617 if( pAppend==0 ){ |
| 1618 if( (pParse->nPhrase % 8)==0 ){ |
| 1619 int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); |
| 1620 Fts5ExprPhrase **apNew; |
| 1621 apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); |
| 1622 if( apNew==0 ){ |
| 1623 pParse->rc = SQLITE_NOMEM; |
| 1624 fts5ExprPhraseFree(sCtx.pPhrase); |
| 1625 return 0; |
| 1626 } |
| 1627 pParse->apPhrase = apNew; |
| 1628 } |
| 1629 pParse->nPhrase++; |
| 1630 } |
| 1631 |
| 1632 if( sCtx.pPhrase==0 ){ |
| 1633 /* This happens when parsing a token or quoted phrase that contains |
| 1634 ** no token characters at all. (e.g ... MATCH '""'). */ |
| 1635 sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase)); |
| 1636 }else if( sCtx.pPhrase->nTerm ){ |
| 1637 sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; |
| 1638 } |
| 1639 pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; |
| 1640 } |
| 1641 |
| 1642 return sCtx.pPhrase; |
| 1643 } |
| 1644 |
| 1645 /* |
| 1646 ** Create a new FTS5 expression by cloning phrase iPhrase of the |
| 1647 ** expression passed as the second argument. |
| 1648 */ |
| 1649 int sqlite3Fts5ExprClonePhrase( |
| 1650 Fts5Expr *pExpr, |
| 1651 int iPhrase, |
| 1652 Fts5Expr **ppNew |
| 1653 ){ |
| 1654 int rc = SQLITE_OK; /* Return code */ |
| 1655 Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ |
| 1656 Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ |
| 1657 TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ |
| 1658 |
| 1659 pOrig = pExpr->apExprPhrase[iPhrase]; |
| 1660 pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); |
| 1661 if( rc==SQLITE_OK ){ |
| 1662 pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, |
| 1663 sizeof(Fts5ExprPhrase*)); |
| 1664 } |
| 1665 if( rc==SQLITE_OK ){ |
| 1666 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, |
| 1667 sizeof(Fts5ExprNode)); |
| 1668 } |
| 1669 if( rc==SQLITE_OK ){ |
| 1670 pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, |
| 1671 sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); |
| 1672 } |
| 1673 if( rc==SQLITE_OK ){ |
| 1674 Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; |
| 1675 if( pColsetOrig ){ |
| 1676 int nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int); |
| 1677 Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); |
| 1678 if( pColset ){ |
| 1679 memcpy(pColset, pColsetOrig, nByte); |
| 1680 } |
| 1681 pNew->pRoot->pNear->pColset = pColset; |
| 1682 } |
| 1683 } |
| 1684 |
| 1685 if( pOrig->nTerm ){ |
| 1686 int i; /* Used to iterate through phrase terms */ |
| 1687 for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ |
| 1688 int tflags = 0; |
| 1689 Fts5ExprTerm *p; |
| 1690 for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ |
| 1691 const char *zTerm = p->zTerm; |
| 1692 rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), |
| 1693 0, 0); |
| 1694 tflags = FTS5_TOKEN_COLOCATED; |
| 1695 } |
| 1696 if( rc==SQLITE_OK ){ |
| 1697 sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; |
| 1698 } |
| 1699 } |
| 1700 }else{ |
| 1701 /* This happens when parsing a token or quoted phrase that contains |
| 1702 ** no token characters at all. (e.g ... MATCH '""'). */ |
| 1703 sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase)); |
| 1704 } |
| 1705 |
| 1706 if( rc==SQLITE_OK ){ |
| 1707 /* All the allocations succeeded. Put the expression object together. */ |
| 1708 pNew->pIndex = pExpr->pIndex; |
| 1709 pNew->pConfig = pExpr->pConfig; |
| 1710 pNew->nPhrase = 1; |
| 1711 pNew->apExprPhrase[0] = sCtx.pPhrase; |
| 1712 pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; |
| 1713 pNew->pRoot->pNear->nPhrase = 1; |
| 1714 sCtx.pPhrase->pNode = pNew->pRoot; |
| 1715 |
| 1716 if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){ |
| 1717 pNew->pRoot->eType = FTS5_TERM; |
| 1718 pNew->pRoot->xNext = fts5ExprNodeNext_TERM; |
| 1719 }else{ |
| 1720 pNew->pRoot->eType = FTS5_STRING; |
| 1721 pNew->pRoot->xNext = fts5ExprNodeNext_STRING; |
| 1722 } |
| 1723 }else{ |
| 1724 sqlite3Fts5ExprFree(pNew); |
| 1725 fts5ExprPhraseFree(sCtx.pPhrase); |
| 1726 pNew = 0; |
| 1727 } |
| 1728 |
| 1729 *ppNew = pNew; |
| 1730 return rc; |
| 1731 } |
| 1732 |
| 1733 |
| 1734 /* |
| 1735 ** Token pTok has appeared in a MATCH expression where the NEAR operator |
| 1736 ** is expected. If token pTok does not contain "NEAR", store an error |
| 1737 ** in the pParse object. |
| 1738 */ |
| 1739 void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ |
| 1740 if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ |
| 1741 sqlite3Fts5ParseError( |
| 1742 pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p |
| 1743 ); |
| 1744 } |
| 1745 } |
| 1746 |
| 1747 void sqlite3Fts5ParseSetDistance( |
| 1748 Fts5Parse *pParse, |
| 1749 Fts5ExprNearset *pNear, |
| 1750 Fts5Token *p |
| 1751 ){ |
| 1752 if( pNear ){ |
| 1753 int nNear = 0; |
| 1754 int i; |
| 1755 if( p->n ){ |
| 1756 for(i=0; i<p->n; i++){ |
| 1757 char c = (char)p->p[i]; |
| 1758 if( c<'0' || c>'9' ){ |
| 1759 sqlite3Fts5ParseError( |
| 1760 pParse, "expected integer, got \"%.*s\"", p->n, p->p |
| 1761 ); |
| 1762 return; |
| 1763 } |
| 1764 nNear = nNear * 10 + (p->p[i] - '0'); |
| 1765 } |
| 1766 }else{ |
| 1767 nNear = FTS5_DEFAULT_NEARDIST; |
| 1768 } |
| 1769 pNear->nNear = nNear; |
| 1770 } |
| 1771 } |
| 1772 |
| 1773 /* |
| 1774 ** The second argument passed to this function may be NULL, or it may be |
| 1775 ** an existing Fts5Colset object. This function returns a pointer to |
| 1776 ** a new colset object containing the contents of (p) with new value column |
| 1777 ** number iCol appended. |
| 1778 ** |
| 1779 ** If an OOM error occurs, store an error code in pParse and return NULL. |
| 1780 ** The old colset object (if any) is not freed in this case. |
| 1781 */ |
| 1782 static Fts5Colset *fts5ParseColset( |
| 1783 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ |
| 1784 Fts5Colset *p, /* Existing colset object */ |
| 1785 int iCol /* New column to add to colset object */ |
| 1786 ){ |
| 1787 int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ |
| 1788 Fts5Colset *pNew; /* New colset object to return */ |
| 1789 |
| 1790 assert( pParse->rc==SQLITE_OK ); |
| 1791 assert( iCol>=0 && iCol<pParse->pConfig->nCol ); |
| 1792 |
| 1793 pNew = sqlite3_realloc(p, sizeof(Fts5Colset) + sizeof(int)*nCol); |
| 1794 if( pNew==0 ){ |
| 1795 pParse->rc = SQLITE_NOMEM; |
| 1796 }else{ |
| 1797 int *aiCol = pNew->aiCol; |
| 1798 int i, j; |
| 1799 for(i=0; i<nCol; i++){ |
| 1800 if( aiCol[i]==iCol ) return pNew; |
| 1801 if( aiCol[i]>iCol ) break; |
| 1802 } |
| 1803 for(j=nCol; j>i; j--){ |
| 1804 aiCol[j] = aiCol[j-1]; |
| 1805 } |
| 1806 aiCol[i] = iCol; |
| 1807 pNew->nCol = nCol+1; |
| 1808 |
| 1809 #ifndef NDEBUG |
| 1810 /* Check that the array is in order and contains no duplicate entries. */ |
| 1811 for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); |
| 1812 #endif |
| 1813 } |
| 1814 |
| 1815 return pNew; |
| 1816 } |
| 1817 |
| 1818 /* |
| 1819 ** Allocate and return an Fts5Colset object specifying the inverse of |
| 1820 ** the colset passed as the second argument. Free the colset passed |
| 1821 ** as the second argument before returning. |
| 1822 */ |
| 1823 Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){ |
| 1824 Fts5Colset *pRet; |
| 1825 int nCol = pParse->pConfig->nCol; |
| 1826 |
| 1827 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, |
| 1828 sizeof(Fts5Colset) + sizeof(int)*nCol |
| 1829 ); |
| 1830 if( pRet ){ |
| 1831 int i; |
| 1832 int iOld = 0; |
| 1833 for(i=0; i<nCol; i++){ |
| 1834 if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ |
| 1835 pRet->aiCol[pRet->nCol++] = i; |
| 1836 }else{ |
| 1837 iOld++; |
| 1838 } |
| 1839 } |
| 1840 } |
| 1841 |
| 1842 sqlite3_free(p); |
| 1843 return pRet; |
| 1844 } |
| 1845 |
| 1846 Fts5Colset *sqlite3Fts5ParseColset( |
| 1847 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ |
| 1848 Fts5Colset *pColset, /* Existing colset object */ |
| 1849 Fts5Token *p |
| 1850 ){ |
| 1851 Fts5Colset *pRet = 0; |
| 1852 int iCol; |
| 1853 char *z; /* Dequoted copy of token p */ |
| 1854 |
| 1855 z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); |
| 1856 if( pParse->rc==SQLITE_OK ){ |
| 1857 Fts5Config *pConfig = pParse->pConfig; |
| 1858 sqlite3Fts5Dequote(z); |
| 1859 for(iCol=0; iCol<pConfig->nCol; iCol++){ |
| 1860 if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break; |
| 1861 } |
| 1862 if( iCol==pConfig->nCol ){ |
| 1863 sqlite3Fts5ParseError(pParse, "no such column: %s", z); |
| 1864 }else{ |
| 1865 pRet = fts5ParseColset(pParse, pColset, iCol); |
| 1866 } |
| 1867 sqlite3_free(z); |
| 1868 } |
| 1869 |
| 1870 if( pRet==0 ){ |
| 1871 assert( pParse->rc!=SQLITE_OK ); |
| 1872 sqlite3_free(pColset); |
| 1873 } |
| 1874 |
| 1875 return pRet; |
| 1876 } |
| 1877 |
| 1878 void sqlite3Fts5ParseSetColset( |
| 1879 Fts5Parse *pParse, |
| 1880 Fts5ExprNearset *pNear, |
| 1881 Fts5Colset *pColset |
| 1882 ){ |
| 1883 if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
| 1884 pParse->rc = SQLITE_ERROR; |
| 1885 pParse->zErr = sqlite3_mprintf( |
| 1886 "fts5: column queries are not supported (detail=none)" |
| 1887 ); |
| 1888 sqlite3_free(pColset); |
| 1889 return; |
| 1890 } |
| 1891 |
| 1892 if( pNear ){ |
| 1893 pNear->pColset = pColset; |
| 1894 }else{ |
| 1895 sqlite3_free(pColset); |
| 1896 } |
| 1897 } |
| 1898 |
| 1899 static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ |
| 1900 switch( pNode->eType ){ |
| 1901 case FTS5_STRING: { |
| 1902 Fts5ExprNearset *pNear = pNode->pNear; |
| 1903 if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 |
| 1904 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 |
| 1905 ){ |
| 1906 pNode->eType = FTS5_TERM; |
| 1907 pNode->xNext = fts5ExprNodeNext_TERM; |
| 1908 }else{ |
| 1909 pNode->xNext = fts5ExprNodeNext_STRING; |
| 1910 } |
| 1911 break; |
| 1912 }; |
| 1913 |
| 1914 case FTS5_OR: { |
| 1915 pNode->xNext = fts5ExprNodeNext_OR; |
| 1916 break; |
| 1917 }; |
| 1918 |
| 1919 case FTS5_AND: { |
| 1920 pNode->xNext = fts5ExprNodeNext_AND; |
| 1921 break; |
| 1922 }; |
| 1923 |
| 1924 default: assert( pNode->eType==FTS5_NOT ); { |
| 1925 pNode->xNext = fts5ExprNodeNext_NOT; |
| 1926 break; |
| 1927 }; |
| 1928 } |
| 1929 } |
| 1930 |
| 1931 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ |
| 1932 if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ |
| 1933 int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; |
| 1934 memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); |
| 1935 p->nChild += pSub->nChild; |
| 1936 sqlite3_free(pSub); |
| 1937 }else{ |
| 1938 p->apChild[p->nChild++] = pSub; |
| 1939 } |
| 1940 } |
| 1941 |
| 1942 /* |
| 1943 ** Allocate and return a new expression object. If anything goes wrong (i.e. |
| 1944 ** OOM error), leave an error code in pParse and return NULL. |
| 1945 */ |
| 1946 Fts5ExprNode *sqlite3Fts5ParseNode( |
| 1947 Fts5Parse *pParse, /* Parse context */ |
| 1948 int eType, /* FTS5_STRING, AND, OR or NOT */ |
| 1949 Fts5ExprNode *pLeft, /* Left hand child expression */ |
| 1950 Fts5ExprNode *pRight, /* Right hand child expression */ |
| 1951 Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ |
| 1952 ){ |
| 1953 Fts5ExprNode *pRet = 0; |
| 1954 |
| 1955 if( pParse->rc==SQLITE_OK ){ |
| 1956 int nChild = 0; /* Number of children of returned node */ |
| 1957 int nByte; /* Bytes of space to allocate for this node */ |
| 1958 |
| 1959 assert( (eType!=FTS5_STRING && !pNear) |
| 1960 || (eType==FTS5_STRING && !pLeft && !pRight) |
| 1961 ); |
| 1962 if( eType==FTS5_STRING && pNear==0 ) return 0; |
| 1963 if( eType!=FTS5_STRING && pLeft==0 ) return pRight; |
| 1964 if( eType!=FTS5_STRING && pRight==0 ) return pLeft; |
| 1965 |
| 1966 if( eType==FTS5_NOT ){ |
| 1967 nChild = 2; |
| 1968 }else if( eType==FTS5_AND || eType==FTS5_OR ){ |
| 1969 nChild = 2; |
| 1970 if( pLeft->eType==eType ) nChild += pLeft->nChild-1; |
| 1971 if( pRight->eType==eType ) nChild += pRight->nChild-1; |
| 1972 } |
| 1973 |
| 1974 nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1); |
| 1975 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); |
| 1976 |
| 1977 if( pRet ){ |
| 1978 pRet->eType = eType; |
| 1979 pRet->pNear = pNear; |
| 1980 fts5ExprAssignXNext(pRet); |
| 1981 if( eType==FTS5_STRING ){ |
| 1982 int iPhrase; |
| 1983 for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ |
| 1984 pNear->apPhrase[iPhrase]->pNode = pRet; |
| 1985 if( pNear->apPhrase[iPhrase]->nTerm==0 ){ |
| 1986 pRet->xNext = 0; |
| 1987 pRet->eType = FTS5_EOF; |
| 1988 } |
| 1989 } |
| 1990 |
| 1991 if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL |
| 1992 && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm>1) |
| 1993 ){ |
| 1994 assert( pParse->rc==SQLITE_OK ); |
| 1995 pParse->rc = SQLITE_ERROR; |
| 1996 assert( pParse->zErr==0 ); |
| 1997 pParse->zErr = sqlite3_mprintf( |
| 1998 "fts5: %s queries are not supported (detail!=full)", |
| 1999 pNear->nPhrase==1 ? "phrase": "NEAR" |
| 2000 ); |
| 2001 sqlite3_free(pRet); |
| 2002 pRet = 0; |
| 2003 } |
| 2004 |
| 2005 }else{ |
| 2006 fts5ExprAddChildren(pRet, pLeft); |
| 2007 fts5ExprAddChildren(pRet, pRight); |
| 2008 } |
| 2009 } |
| 2010 } |
| 2011 |
| 2012 if( pRet==0 ){ |
| 2013 assert( pParse->rc!=SQLITE_OK ); |
| 2014 sqlite3Fts5ParseNodeFree(pLeft); |
| 2015 sqlite3Fts5ParseNodeFree(pRight); |
| 2016 sqlite3Fts5ParseNearsetFree(pNear); |
| 2017 } |
| 2018 return pRet; |
| 2019 } |
| 2020 |
| 2021 Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( |
| 2022 Fts5Parse *pParse, /* Parse context */ |
| 2023 Fts5ExprNode *pLeft, /* Left hand child expression */ |
| 2024 Fts5ExprNode *pRight /* Right hand child expression */ |
| 2025 ){ |
| 2026 Fts5ExprNode *pRet = 0; |
| 2027 Fts5ExprNode *pPrev; |
| 2028 |
| 2029 if( pParse->rc ){ |
| 2030 sqlite3Fts5ParseNodeFree(pLeft); |
| 2031 sqlite3Fts5ParseNodeFree(pRight); |
| 2032 }else{ |
| 2033 |
| 2034 assert( pLeft->eType==FTS5_STRING |
| 2035 || pLeft->eType==FTS5_TERM |
| 2036 || pLeft->eType==FTS5_EOF |
| 2037 || pLeft->eType==FTS5_AND |
| 2038 ); |
| 2039 assert( pRight->eType==FTS5_STRING |
| 2040 || pRight->eType==FTS5_TERM |
| 2041 || pRight->eType==FTS5_EOF |
| 2042 ); |
| 2043 |
| 2044 if( pLeft->eType==FTS5_AND ){ |
| 2045 pPrev = pLeft->apChild[pLeft->nChild-1]; |
| 2046 }else{ |
| 2047 pPrev = pLeft; |
| 2048 } |
| 2049 assert( pPrev->eType==FTS5_STRING |
| 2050 || pPrev->eType==FTS5_TERM |
| 2051 || pPrev->eType==FTS5_EOF |
| 2052 ); |
| 2053 |
| 2054 if( pRight->eType==FTS5_EOF ){ |
| 2055 assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] ); |
| 2056 sqlite3Fts5ParseNodeFree(pRight); |
| 2057 pRet = pLeft; |
| 2058 pParse->nPhrase--; |
| 2059 } |
| 2060 else if( pPrev->eType==FTS5_EOF ){ |
| 2061 Fts5ExprPhrase **ap; |
| 2062 |
| 2063 if( pPrev==pLeft ){ |
| 2064 pRet = pRight; |
| 2065 }else{ |
| 2066 pLeft->apChild[pLeft->nChild-1] = pRight; |
| 2067 pRet = pLeft; |
| 2068 } |
| 2069 |
| 2070 ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; |
| 2071 assert( ap[0]==pPrev->pNear->apPhrase[0] ); |
| 2072 memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); |
| 2073 pParse->nPhrase--; |
| 2074 |
| 2075 sqlite3Fts5ParseNodeFree(pPrev); |
| 2076 } |
| 2077 else{ |
| 2078 pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0); |
| 2079 } |
| 2080 } |
| 2081 |
| 2082 return pRet; |
| 2083 } |
| 2084 |
| 2085 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ |
| 2086 int nByte = 0; |
| 2087 Fts5ExprTerm *p; |
| 2088 char *zQuoted; |
| 2089 |
| 2090 /* Determine the maximum amount of space required. */ |
| 2091 for(p=pTerm; p; p=p->pSynonym){ |
| 2092 nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; |
| 2093 } |
| 2094 zQuoted = sqlite3_malloc(nByte); |
| 2095 |
| 2096 if( zQuoted ){ |
| 2097 int i = 0; |
| 2098 for(p=pTerm; p; p=p->pSynonym){ |
| 2099 char *zIn = p->zTerm; |
| 2100 zQuoted[i++] = '"'; |
| 2101 while( *zIn ){ |
| 2102 if( *zIn=='"' ) zQuoted[i++] = '"'; |
| 2103 zQuoted[i++] = *zIn++; |
| 2104 } |
| 2105 zQuoted[i++] = '"'; |
| 2106 if( p->pSynonym ) zQuoted[i++] = '|'; |
| 2107 } |
| 2108 if( pTerm->bPrefix ){ |
| 2109 zQuoted[i++] = ' '; |
| 2110 zQuoted[i++] = '*'; |
| 2111 } |
| 2112 zQuoted[i++] = '\0'; |
| 2113 } |
| 2114 return zQuoted; |
| 2115 } |
| 2116 |
| 2117 static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ |
| 2118 char *zNew; |
| 2119 va_list ap; |
| 2120 va_start(ap, zFmt); |
| 2121 zNew = sqlite3_vmprintf(zFmt, ap); |
| 2122 va_end(ap); |
| 2123 if( zApp && zNew ){ |
| 2124 char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); |
| 2125 sqlite3_free(zNew); |
| 2126 zNew = zNew2; |
| 2127 } |
| 2128 sqlite3_free(zApp); |
| 2129 return zNew; |
| 2130 } |
| 2131 |
| 2132 /* |
| 2133 ** Compose a tcl-readable representation of expression pExpr. Return a |
| 2134 ** pointer to a buffer containing that representation. It is the |
| 2135 ** responsibility of the caller to at some point free the buffer using |
| 2136 ** sqlite3_free(). |
| 2137 */ |
| 2138 static char *fts5ExprPrintTcl( |
| 2139 Fts5Config *pConfig, |
| 2140 const char *zNearsetCmd, |
| 2141 Fts5ExprNode *pExpr |
| 2142 ){ |
| 2143 char *zRet = 0; |
| 2144 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ |
| 2145 Fts5ExprNearset *pNear = pExpr->pNear; |
| 2146 int i; |
| 2147 int iTerm; |
| 2148 |
| 2149 zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); |
| 2150 if( zRet==0 ) return 0; |
| 2151 if( pNear->pColset ){ |
| 2152 int *aiCol = pNear->pColset->aiCol; |
| 2153 int nCol = pNear->pColset->nCol; |
| 2154 if( nCol==1 ){ |
| 2155 zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); |
| 2156 }else{ |
| 2157 zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); |
| 2158 for(i=1; i<pNear->pColset->nCol; i++){ |
| 2159 zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); |
| 2160 } |
| 2161 zRet = fts5PrintfAppend(zRet, "} "); |
| 2162 } |
| 2163 if( zRet==0 ) return 0; |
| 2164 } |
| 2165 |
| 2166 if( pNear->nPhrase>1 ){ |
| 2167 zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); |
| 2168 if( zRet==0 ) return 0; |
| 2169 } |
| 2170 |
| 2171 zRet = fts5PrintfAppend(zRet, "--"); |
| 2172 if( zRet==0 ) return 0; |
| 2173 |
| 2174 for(i=0; i<pNear->nPhrase; i++){ |
| 2175 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 2176 |
| 2177 zRet = fts5PrintfAppend(zRet, " {"); |
| 2178 for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ |
| 2179 char *zTerm = pPhrase->aTerm[iTerm].zTerm; |
| 2180 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); |
| 2181 if( pPhrase->aTerm[iTerm].bPrefix ){ |
| 2182 zRet = fts5PrintfAppend(zRet, "*"); |
| 2183 } |
| 2184 } |
| 2185 |
| 2186 if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); |
| 2187 if( zRet==0 ) return 0; |
| 2188 } |
| 2189 |
| 2190 }else{ |
| 2191 char const *zOp = 0; |
| 2192 int i; |
| 2193 switch( pExpr->eType ){ |
| 2194 case FTS5_AND: zOp = "AND"; break; |
| 2195 case FTS5_NOT: zOp = "NOT"; break; |
| 2196 default: |
| 2197 assert( pExpr->eType==FTS5_OR ); |
| 2198 zOp = "OR"; |
| 2199 break; |
| 2200 } |
| 2201 |
| 2202 zRet = sqlite3_mprintf("%s", zOp); |
| 2203 for(i=0; zRet && i<pExpr->nChild; i++){ |
| 2204 char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); |
| 2205 if( !z ){ |
| 2206 sqlite3_free(zRet); |
| 2207 zRet = 0; |
| 2208 }else{ |
| 2209 zRet = fts5PrintfAppend(zRet, " [%z]", z); |
| 2210 } |
| 2211 } |
| 2212 } |
| 2213 |
| 2214 return zRet; |
| 2215 } |
| 2216 |
| 2217 static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ |
| 2218 char *zRet = 0; |
| 2219 if( pExpr->eType==0 ){ |
| 2220 return sqlite3_mprintf("\"\""); |
| 2221 }else |
| 2222 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ |
| 2223 Fts5ExprNearset *pNear = pExpr->pNear; |
| 2224 int i; |
| 2225 int iTerm; |
| 2226 |
| 2227 if( pNear->pColset ){ |
| 2228 int iCol = pNear->pColset->aiCol[0]; |
| 2229 zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]); |
| 2230 if( zRet==0 ) return 0; |
| 2231 } |
| 2232 |
| 2233 if( pNear->nPhrase>1 ){ |
| 2234 zRet = fts5PrintfAppend(zRet, "NEAR("); |
| 2235 if( zRet==0 ) return 0; |
| 2236 } |
| 2237 |
| 2238 for(i=0; i<pNear->nPhrase; i++){ |
| 2239 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
| 2240 if( i!=0 ){ |
| 2241 zRet = fts5PrintfAppend(zRet, " "); |
| 2242 if( zRet==0 ) return 0; |
| 2243 } |
| 2244 for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ |
| 2245 char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); |
| 2246 if( zTerm ){ |
| 2247 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); |
| 2248 sqlite3_free(zTerm); |
| 2249 } |
| 2250 if( zTerm==0 || zRet==0 ){ |
| 2251 sqlite3_free(zRet); |
| 2252 return 0; |
| 2253 } |
| 2254 } |
| 2255 } |
| 2256 |
| 2257 if( pNear->nPhrase>1 ){ |
| 2258 zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); |
| 2259 if( zRet==0 ) return 0; |
| 2260 } |
| 2261 |
| 2262 }else{ |
| 2263 char const *zOp = 0; |
| 2264 int i; |
| 2265 |
| 2266 switch( pExpr->eType ){ |
| 2267 case FTS5_AND: zOp = " AND "; break; |
| 2268 case FTS5_NOT: zOp = " NOT "; break; |
| 2269 default: |
| 2270 assert( pExpr->eType==FTS5_OR ); |
| 2271 zOp = " OR "; |
| 2272 break; |
| 2273 } |
| 2274 |
| 2275 for(i=0; i<pExpr->nChild; i++){ |
| 2276 char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); |
| 2277 if( z==0 ){ |
| 2278 sqlite3_free(zRet); |
| 2279 zRet = 0; |
| 2280 }else{ |
| 2281 int e = pExpr->apChild[i]->eType; |
| 2282 int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF); |
| 2283 zRet = fts5PrintfAppend(zRet, "%s%s%z%s", |
| 2284 (i==0 ? "" : zOp), |
| 2285 (b?"(":""), z, (b?")":"") |
| 2286 ); |
| 2287 } |
| 2288 if( zRet==0 ) break; |
| 2289 } |
| 2290 } |
| 2291 |
| 2292 return zRet; |
| 2293 } |
| 2294 |
| 2295 /* |
| 2296 ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) |
| 2297 ** and fts5_expr_tcl() (bTcl!=0). |
| 2298 */ |
| 2299 static void fts5ExprFunction( |
| 2300 sqlite3_context *pCtx, /* Function call context */ |
| 2301 int nArg, /* Number of args */ |
| 2302 sqlite3_value **apVal, /* Function arguments */ |
| 2303 int bTcl |
| 2304 ){ |
| 2305 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); |
| 2306 sqlite3 *db = sqlite3_context_db_handle(pCtx); |
| 2307 const char *zExpr = 0; |
| 2308 char *zErr = 0; |
| 2309 Fts5Expr *pExpr = 0; |
| 2310 int rc; |
| 2311 int i; |
| 2312 |
| 2313 const char **azConfig; /* Array of arguments for Fts5Config */ |
| 2314 const char *zNearsetCmd = "nearset"; |
| 2315 int nConfig; /* Size of azConfig[] */ |
| 2316 Fts5Config *pConfig = 0; |
| 2317 int iArg = 1; |
| 2318 |
| 2319 if( nArg<1 ){ |
| 2320 zErr = sqlite3_mprintf("wrong number of arguments to function %s", |
| 2321 bTcl ? "fts5_expr_tcl" : "fts5_expr" |
| 2322 ); |
| 2323 sqlite3_result_error(pCtx, zErr, -1); |
| 2324 sqlite3_free(zErr); |
| 2325 return; |
| 2326 } |
| 2327 |
| 2328 if( bTcl && nArg>1 ){ |
| 2329 zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); |
| 2330 iArg = 2; |
| 2331 } |
| 2332 |
| 2333 nConfig = 3 + (nArg-iArg); |
| 2334 azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); |
| 2335 if( azConfig==0 ){ |
| 2336 sqlite3_result_error_nomem(pCtx); |
| 2337 return; |
| 2338 } |
| 2339 azConfig[0] = 0; |
| 2340 azConfig[1] = "main"; |
| 2341 azConfig[2] = "tbl"; |
| 2342 for(i=3; iArg<nArg; iArg++){ |
| 2343 azConfig[i++] = (const char*)sqlite3_value_text(apVal[iArg]); |
| 2344 } |
| 2345 |
| 2346 zExpr = (const char*)sqlite3_value_text(apVal[0]); |
| 2347 |
| 2348 rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); |
| 2349 if( rc==SQLITE_OK ){ |
| 2350 rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); |
| 2351 } |
| 2352 if( rc==SQLITE_OK ){ |
| 2353 char *zText; |
| 2354 if( pExpr->pRoot->xNext==0 ){ |
| 2355 zText = sqlite3_mprintf(""); |
| 2356 }else if( bTcl ){ |
| 2357 zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); |
| 2358 }else{ |
| 2359 zText = fts5ExprPrint(pConfig, pExpr->pRoot); |
| 2360 } |
| 2361 if( zText==0 ){ |
| 2362 rc = SQLITE_NOMEM; |
| 2363 }else{ |
| 2364 sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); |
| 2365 sqlite3_free(zText); |
| 2366 } |
| 2367 } |
| 2368 |
| 2369 if( rc!=SQLITE_OK ){ |
| 2370 if( zErr ){ |
| 2371 sqlite3_result_error(pCtx, zErr, -1); |
| 2372 sqlite3_free(zErr); |
| 2373 }else{ |
| 2374 sqlite3_result_error_code(pCtx, rc); |
| 2375 } |
| 2376 } |
| 2377 sqlite3_free((void *)azConfig); |
| 2378 sqlite3Fts5ConfigFree(pConfig); |
| 2379 sqlite3Fts5ExprFree(pExpr); |
| 2380 } |
| 2381 |
| 2382 static void fts5ExprFunctionHr( |
| 2383 sqlite3_context *pCtx, /* Function call context */ |
| 2384 int nArg, /* Number of args */ |
| 2385 sqlite3_value **apVal /* Function arguments */ |
| 2386 ){ |
| 2387 fts5ExprFunction(pCtx, nArg, apVal, 0); |
| 2388 } |
| 2389 static void fts5ExprFunctionTcl( |
| 2390 sqlite3_context *pCtx, /* Function call context */ |
| 2391 int nArg, /* Number of args */ |
| 2392 sqlite3_value **apVal /* Function arguments */ |
| 2393 ){ |
| 2394 fts5ExprFunction(pCtx, nArg, apVal, 1); |
| 2395 } |
| 2396 |
| 2397 /* |
| 2398 ** The implementation of an SQLite user-defined-function that accepts a |
| 2399 ** single integer as an argument. If the integer is an alpha-numeric |
| 2400 ** unicode code point, 1 is returned. Otherwise 0. |
| 2401 */ |
| 2402 static void fts5ExprIsAlnum( |
| 2403 sqlite3_context *pCtx, /* Function call context */ |
| 2404 int nArg, /* Number of args */ |
| 2405 sqlite3_value **apVal /* Function arguments */ |
| 2406 ){ |
| 2407 int iCode; |
| 2408 if( nArg!=1 ){ |
| 2409 sqlite3_result_error(pCtx, |
| 2410 "wrong number of arguments to function fts5_isalnum", -1 |
| 2411 ); |
| 2412 return; |
| 2413 } |
| 2414 iCode = sqlite3_value_int(apVal[0]); |
| 2415 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode)); |
| 2416 } |
| 2417 |
| 2418 static void fts5ExprFold( |
| 2419 sqlite3_context *pCtx, /* Function call context */ |
| 2420 int nArg, /* Number of args */ |
| 2421 sqlite3_value **apVal /* Function arguments */ |
| 2422 ){ |
| 2423 if( nArg!=1 && nArg!=2 ){ |
| 2424 sqlite3_result_error(pCtx, |
| 2425 "wrong number of arguments to function fts5_fold", -1 |
| 2426 ); |
| 2427 }else{ |
| 2428 int iCode; |
| 2429 int bRemoveDiacritics = 0; |
| 2430 iCode = sqlite3_value_int(apVal[0]); |
| 2431 if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]); |
| 2432 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); |
| 2433 } |
| 2434 } |
| 2435 |
| 2436 /* |
| 2437 ** This is called during initialization to register the fts5_expr() scalar |
| 2438 ** UDF with the SQLite handle passed as the only argument. |
| 2439 */ |
| 2440 int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ |
| 2441 struct Fts5ExprFunc { |
| 2442 const char *z; |
| 2443 void (*x)(sqlite3_context*,int,sqlite3_value**); |
| 2444 } aFunc[] = { |
| 2445 { "fts5_expr", fts5ExprFunctionHr }, |
| 2446 { "fts5_expr_tcl", fts5ExprFunctionTcl }, |
| 2447 { "fts5_isalnum", fts5ExprIsAlnum }, |
| 2448 { "fts5_fold", fts5ExprFold }, |
| 2449 }; |
| 2450 int i; |
| 2451 int rc = SQLITE_OK; |
| 2452 void *pCtx = (void*)pGlobal; |
| 2453 |
| 2454 for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){ |
| 2455 struct Fts5ExprFunc *p = &aFunc[i]; |
| 2456 rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); |
| 2457 } |
| 2458 |
| 2459 /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */ |
| 2460 #ifndef NDEBUG |
| 2461 (void)sqlite3Fts5ParserTrace; |
| 2462 #endif |
| 2463 |
| 2464 return rc; |
| 2465 } |
| 2466 |
| 2467 /* |
| 2468 ** Return the number of phrases in expression pExpr. |
| 2469 */ |
| 2470 int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ |
| 2471 return (pExpr ? pExpr->nPhrase : 0); |
| 2472 } |
| 2473 |
| 2474 /* |
| 2475 ** Return the number of terms in the iPhrase'th phrase in pExpr. |
| 2476 */ |
| 2477 int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ |
| 2478 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; |
| 2479 return pExpr->apExprPhrase[iPhrase]->nTerm; |
| 2480 } |
| 2481 |
| 2482 /* |
| 2483 ** This function is used to access the current position list for phrase |
| 2484 ** iPhrase. |
| 2485 */ |
| 2486 int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ |
| 2487 int nRet; |
| 2488 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; |
| 2489 Fts5ExprNode *pNode = pPhrase->pNode; |
| 2490 if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ |
| 2491 *pa = pPhrase->poslist.p; |
| 2492 nRet = pPhrase->poslist.n; |
| 2493 }else{ |
| 2494 *pa = 0; |
| 2495 nRet = 0; |
| 2496 } |
| 2497 return nRet; |
| 2498 } |
| 2499 |
| 2500 struct Fts5PoslistPopulator { |
| 2501 Fts5PoslistWriter writer; |
| 2502 int bOk; /* True if ok to populate */ |
| 2503 int bMiss; |
| 2504 }; |
| 2505 |
| 2506 Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ |
| 2507 Fts5PoslistPopulator *pRet; |
| 2508 pRet = sqlite3_malloc(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); |
| 2509 if( pRet ){ |
| 2510 int i; |
| 2511 memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); |
| 2512 for(i=0; i<pExpr->nPhrase; i++){ |
| 2513 Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; |
| 2514 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; |
| 2515 assert( pExpr->apExprPhrase[i]->nTerm==1 ); |
| 2516 if( bLive && |
| 2517 (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) |
| 2518 ){ |
| 2519 pRet[i].bMiss = 1; |
| 2520 }else{ |
| 2521 pBuf->n = 0; |
| 2522 } |
| 2523 } |
| 2524 } |
| 2525 return pRet; |
| 2526 } |
| 2527 |
| 2528 struct Fts5ExprCtx { |
| 2529 Fts5Expr *pExpr; |
| 2530 Fts5PoslistPopulator *aPopulator; |
| 2531 i64 iOff; |
| 2532 }; |
| 2533 typedef struct Fts5ExprCtx Fts5ExprCtx; |
| 2534 |
| 2535 /* |
| 2536 ** TODO: Make this more efficient! |
| 2537 */ |
| 2538 static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ |
| 2539 int i; |
| 2540 for(i=0; i<pColset->nCol; i++){ |
| 2541 if( pColset->aiCol[i]==iCol ) return 1; |
| 2542 } |
| 2543 return 0; |
| 2544 } |
| 2545 |
| 2546 static int fts5ExprPopulatePoslistsCb( |
| 2547 void *pCtx, /* Copy of 2nd argument to xTokenize() */ |
| 2548 int tflags, /* Mask of FTS5_TOKEN_* flags */ |
| 2549 const char *pToken, /* Pointer to buffer containing token */ |
| 2550 int nToken, /* Size of token in bytes */ |
| 2551 int iUnused1, /* Byte offset of token within input text */ |
| 2552 int iUnused2 /* Byte offset of end of token within input text */ |
| 2553 ){ |
| 2554 Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; |
| 2555 Fts5Expr *pExpr = p->pExpr; |
| 2556 int i; |
| 2557 |
| 2558 UNUSED_PARAM2(iUnused1, iUnused2); |
| 2559 |
| 2560 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
| 2561 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; |
| 2562 for(i=0; i<pExpr->nPhrase; i++){ |
| 2563 Fts5ExprTerm *pTerm; |
| 2564 if( p->aPopulator[i].bOk==0 ) continue; |
| 2565 for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ |
| 2566 int nTerm = (int)strlen(pTerm->zTerm); |
| 2567 if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) |
| 2568 && memcmp(pTerm->zTerm, pToken, nTerm)==0 |
| 2569 ){ |
| 2570 int rc = sqlite3Fts5PoslistWriterAppend( |
| 2571 &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff |
| 2572 ); |
| 2573 if( rc ) return rc; |
| 2574 break; |
| 2575 } |
| 2576 } |
| 2577 } |
| 2578 return SQLITE_OK; |
| 2579 } |
| 2580 |
| 2581 int sqlite3Fts5ExprPopulatePoslists( |
| 2582 Fts5Config *pConfig, |
| 2583 Fts5Expr *pExpr, |
| 2584 Fts5PoslistPopulator *aPopulator, |
| 2585 int iCol, |
| 2586 const char *z, int n |
| 2587 ){ |
| 2588 int i; |
| 2589 Fts5ExprCtx sCtx; |
| 2590 sCtx.pExpr = pExpr; |
| 2591 sCtx.aPopulator = aPopulator; |
| 2592 sCtx.iOff = (((i64)iCol) << 32) - 1; |
| 2593 |
| 2594 for(i=0; i<pExpr->nPhrase; i++){ |
| 2595 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; |
| 2596 Fts5Colset *pColset = pNode->pNear->pColset; |
| 2597 if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) |
| 2598 || aPopulator[i].bMiss |
| 2599 ){ |
| 2600 aPopulator[i].bOk = 0; |
| 2601 }else{ |
| 2602 aPopulator[i].bOk = 1; |
| 2603 } |
| 2604 } |
| 2605 |
| 2606 return sqlite3Fts5Tokenize(pConfig, |
| 2607 FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb |
| 2608 ); |
| 2609 } |
| 2610 |
| 2611 static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ |
| 2612 if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){ |
| 2613 pNode->pNear->apPhrase[0]->poslist.n = 0; |
| 2614 }else{ |
| 2615 int i; |
| 2616 for(i=0; i<pNode->nChild; i++){ |
| 2617 fts5ExprClearPoslists(pNode->apChild[i]); |
| 2618 } |
| 2619 } |
| 2620 } |
| 2621 |
| 2622 static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ |
| 2623 pNode->iRowid = iRowid; |
| 2624 pNode->bEof = 0; |
| 2625 switch( pNode->eType ){ |
| 2626 case FTS5_TERM: |
| 2627 case FTS5_STRING: |
| 2628 return (pNode->pNear->apPhrase[0]->poslist.n>0); |
| 2629 |
| 2630 case FTS5_AND: { |
| 2631 int i; |
| 2632 for(i=0; i<pNode->nChild; i++){ |
| 2633 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ |
| 2634 fts5ExprClearPoslists(pNode); |
| 2635 return 0; |
| 2636 } |
| 2637 } |
| 2638 break; |
| 2639 } |
| 2640 |
| 2641 case FTS5_OR: { |
| 2642 int i; |
| 2643 int bRet = 0; |
| 2644 for(i=0; i<pNode->nChild; i++){ |
| 2645 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ |
| 2646 bRet = 1; |
| 2647 } |
| 2648 } |
| 2649 return bRet; |
| 2650 } |
| 2651 |
| 2652 default: { |
| 2653 assert( pNode->eType==FTS5_NOT ); |
| 2654 if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) |
| 2655 || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) |
| 2656 ){ |
| 2657 fts5ExprClearPoslists(pNode); |
| 2658 return 0; |
| 2659 } |
| 2660 break; |
| 2661 } |
| 2662 } |
| 2663 return 1; |
| 2664 } |
| 2665 |
| 2666 void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ |
| 2667 fts5ExprCheckPoslists(pExpr->pRoot, iRowid); |
| 2668 } |
| 2669 |
| 2670 /* |
| 2671 ** This function is only called for detail=columns tables. |
| 2672 */ |
| 2673 int sqlite3Fts5ExprPhraseCollist( |
| 2674 Fts5Expr *pExpr, |
| 2675 int iPhrase, |
| 2676 const u8 **ppCollist, |
| 2677 int *pnCollist |
| 2678 ){ |
| 2679 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; |
| 2680 Fts5ExprNode *pNode = pPhrase->pNode; |
| 2681 int rc = SQLITE_OK; |
| 2682 |
| 2683 assert( iPhrase>=0 && iPhrase<pExpr->nPhrase ); |
| 2684 assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
| 2685 |
| 2686 if( pNode->bEof==0 |
| 2687 && pNode->iRowid==pExpr->pRoot->iRowid |
| 2688 && pPhrase->poslist.n>0 |
| 2689 ){ |
| 2690 Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; |
| 2691 if( pTerm->pSynonym ){ |
| 2692 Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; |
| 2693 rc = fts5ExprSynonymList( |
| 2694 pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist |
| 2695 ); |
| 2696 }else{ |
| 2697 *ppCollist = pPhrase->aTerm[0].pIter->pData; |
| 2698 *pnCollist = pPhrase->aTerm[0].pIter->nData; |
| 2699 } |
| 2700 }else{ |
| 2701 *ppCollist = 0; |
| 2702 *pnCollist = 0; |
| 2703 } |
| 2704 |
| 2705 return rc; |
| 2706 } |
| 2707 |
OLD | NEW |