| OLD | NEW |
| 1 /* | 1 /* |
| 2 ** 2009 Oct 23 | 2 ** 2009 Oct 23 |
| 3 ** | 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
| 6 ** | 6 ** |
| 7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
| 10 ** | 10 ** |
| 11 ****************************************************************************** | 11 ****************************************************************************** |
| 12 */ | 12 */ |
| 13 | 13 |
| 14 #include "fts3Int.h" |
| 14 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| 15 | 16 |
| 16 #include "fts3Int.h" | |
| 17 #include <string.h> | 17 #include <string.h> |
| 18 #include <assert.h> | 18 #include <assert.h> |
| 19 | 19 |
| 20 /* | 20 /* |
| 21 ** Characters that may appear in the second argument to matchinfo(). | 21 ** Characters that may appear in the second argument to matchinfo(). |
| 22 */ | 22 */ |
| 23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ | 23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ |
| 24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ | 24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ |
| 25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ | 25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ |
| 26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ | 26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 121 ** | 121 ** |
| 122 ** are encoded. | 122 ** are encoded. |
| 123 ** | 123 ** |
| 124 ** When this function is called, *pp points to the start of an element of | 124 ** When this function is called, *pp points to the start of an element of |
| 125 ** the list. *piPos contains the value of the previous entry in the list. | 125 ** the list. *piPos contains the value of the previous entry in the list. |
| 126 ** After it returns, *piPos contains the value of the next element of the | 126 ** After it returns, *piPos contains the value of the next element of the |
| 127 ** list and *pp is advanced to the following varint. | 127 ** list and *pp is advanced to the following varint. |
| 128 */ | 128 */ |
| 129 static void fts3GetDeltaPosition(char **pp, int *piPos){ | 129 static void fts3GetDeltaPosition(char **pp, int *piPos){ |
| 130 int iVal; | 130 int iVal; |
| 131 *pp += sqlite3Fts3GetVarint32(*pp, &iVal); | 131 *pp += fts3GetVarint32(*pp, &iVal); |
| 132 *piPos += (iVal-2); | 132 *piPos += (iVal-2); |
| 133 } | 133 } |
| 134 | 134 |
| 135 /* | 135 /* |
| 136 ** Helper function for fts3ExprIterate() (see below). | 136 ** Helper function for fts3ExprIterate() (see below). |
| 137 */ | 137 */ |
| 138 static int fts3ExprIterate2( | 138 static int fts3ExprIterate2( |
| 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 140 int *piPhrase, /* Pointer to phrase counter */ | 140 int *piPhrase, /* Pointer to phrase counter */ |
| 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| (...skipping 28 matching lines...) Expand all Loading... |
| 170 static int fts3ExprIterate( | 170 static int fts3ExprIterate( |
| 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
| 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
| 173 void *pCtx /* Second argument to pass to callback */ | 173 void *pCtx /* Second argument to pass to callback */ |
| 174 ){ | 174 ){ |
| 175 int iPhrase = 0; /* Variable used as the phrase counter */ | 175 int iPhrase = 0; /* Variable used as the phrase counter */ |
| 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); | 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); |
| 177 } | 177 } |
| 178 | 178 |
| 179 /* | 179 /* |
| 180 ** The argument to this function is always a phrase node. Its doclist | |
| 181 ** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes | |
| 182 ** to the left of this one in the query tree have already been loaded. | |
| 183 ** | |
| 184 ** If this phrase node is part of a series of phrase nodes joined by | |
| 185 ** NEAR operators (and is not the left-most of said series), then elements are | |
| 186 ** removed from the phrases doclist consistent with the NEAR restriction. If | |
| 187 ** required, elements may be removed from the doclists of phrases to the | |
| 188 ** left of this one that are part of the same series of NEAR operator | |
| 189 ** connected phrases. | |
| 190 ** | |
| 191 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. | |
| 192 */ | |
| 193 static int fts3ExprNearTrim(Fts3Expr *pExpr){ | |
| 194 int rc = SQLITE_OK; | |
| 195 Fts3Expr *pParent = pExpr->pParent; | |
| 196 | |
| 197 assert( pExpr->eType==FTSQUERY_PHRASE ); | |
| 198 while( rc==SQLITE_OK | |
| 199 && pParent | |
| 200 && pParent->eType==FTSQUERY_NEAR | |
| 201 && pParent->pRight==pExpr | |
| 202 ){ | |
| 203 /* This expression (pExpr) is the right-hand-side of a NEAR operator. | |
| 204 ** Find the expression to the left of the same operator. | |
| 205 */ | |
| 206 int nNear = pParent->nNear; | |
| 207 Fts3Expr *pLeft = pParent->pLeft; | |
| 208 | |
| 209 if( pLeft->eType!=FTSQUERY_PHRASE ){ | |
| 210 assert( pLeft->eType==FTSQUERY_NEAR ); | |
| 211 assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); | |
| 212 pLeft = pLeft->pRight; | |
| 213 } | |
| 214 | |
| 215 rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear); | |
| 216 | |
| 217 pExpr = pLeft; | |
| 218 pParent = pExpr->pParent; | |
| 219 } | |
| 220 | |
| 221 return rc; | |
| 222 } | |
| 223 | |
| 224 /* | |
| 225 ** This is an fts3ExprIterate() callback used while loading the doclists | 180 ** This is an fts3ExprIterate() callback used while loading the doclists |
| 226 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also | 181 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also |
| 227 ** fts3ExprLoadDoclists(). | 182 ** fts3ExprLoadDoclists(). |
| 228 */ | 183 */ |
| 229 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ | 184 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 230 int rc = SQLITE_OK; | 185 int rc = SQLITE_OK; |
| 186 Fts3Phrase *pPhrase = pExpr->pPhrase; |
| 231 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; | 187 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; |
| 232 | 188 |
| 233 UNUSED_PARAMETER(iPhrase); | 189 UNUSED_PARAMETER(iPhrase); |
| 234 | 190 |
| 235 p->nPhrase++; | 191 p->nPhrase++; |
| 236 p->nToken += pExpr->pPhrase->nToken; | 192 p->nToken += pPhrase->nToken; |
| 237 | |
| 238 if( pExpr->isLoaded==0 ){ | |
| 239 rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); | |
| 240 pExpr->isLoaded = 1; | |
| 241 if( rc==SQLITE_OK ){ | |
| 242 rc = fts3ExprNearTrim(pExpr); | |
| 243 } | |
| 244 } | |
| 245 | 193 |
| 246 return rc; | 194 return rc; |
| 247 } | 195 } |
| 248 | 196 |
| 249 /* | 197 /* |
| 250 ** Load the doclists for each phrase in the query associated with FTS3 cursor | 198 ** Load the doclists for each phrase in the query associated with FTS3 cursor |
| 251 ** pCsr. | 199 ** pCsr. |
| 252 ** | 200 ** |
| 253 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable | 201 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable |
| 254 ** phrases in the expression (all phrases except those directly or | 202 ** phrases in the expression (all phrases except those directly or |
| (...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 405 } | 353 } |
| 406 | 354 |
| 407 /* | 355 /* |
| 408 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). | 356 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). |
| 409 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. | 357 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. |
| 410 */ | 358 */ |
| 411 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ | 359 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 412 SnippetIter *p = (SnippetIter *)ctx; | 360 SnippetIter *p = (SnippetIter *)ctx; |
| 413 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; | 361 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; |
| 414 char *pCsr; | 362 char *pCsr; |
| 363 int rc; |
| 415 | 364 |
| 416 pPhrase->nToken = pExpr->pPhrase->nToken; | 365 pPhrase->nToken = pExpr->pPhrase->nToken; |
| 417 | 366 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); |
| 418 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); | 367 assert( rc==SQLITE_OK || pCsr==0 ); |
| 419 if( pCsr ){ | 368 if( pCsr ){ |
| 420 int iFirst = 0; | 369 int iFirst = 0; |
| 421 pPhrase->pList = pCsr; | 370 pPhrase->pList = pCsr; |
| 422 fts3GetDeltaPosition(&pCsr, &iFirst); | 371 fts3GetDeltaPosition(&pCsr, &iFirst); |
| 372 assert( iFirst>=0 ); |
| 423 pPhrase->pHead = pCsr; | 373 pPhrase->pHead = pCsr; |
| 424 pPhrase->pTail = pCsr; | 374 pPhrase->pTail = pCsr; |
| 425 pPhrase->iHead = iFirst; | 375 pPhrase->iHead = iFirst; |
| 426 pPhrase->iTail = iFirst; | 376 pPhrase->iTail = iFirst; |
| 427 }else{ | 377 }else{ |
| 428 assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); | 378 assert( rc!=SQLITE_OK || ( |
| 379 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 |
| 380 )); |
| 429 } | 381 } |
| 430 | 382 |
| 431 return SQLITE_OK; | 383 return rc; |
| 432 } | 384 } |
| 433 | 385 |
| 434 /* | 386 /* |
| 435 ** Select the fragment of text consisting of nFragment contiguous tokens | 387 ** Select the fragment of text consisting of nFragment contiguous tokens |
| 436 ** from column iCol that represent the "best" snippet. The best snippet | 388 ** from column iCol that represent the "best" snippet. The best snippet |
| 437 ** is the snippet with the highest score, where scores are calculated | 389 ** is the snippet with the highest score, where scores are calculated |
| 438 ** by adding: | 390 ** by adding: |
| 439 ** | 391 ** |
| 440 ** (a) +1 point for each occurence of a matchable phrase in the snippet. | 392 ** (a) +1 point for each occurrence of a matchable phrase in the snippet. |
| 441 ** | 393 ** |
| 442 ** (b) +1000 points for the first occurence of each matchable phrase in | 394 ** (b) +1000 points for the first occurrence of each matchable phrase in |
| 443 ** the snippet for which the corresponding mCovered bit is not set. | 395 ** the snippet for which the corresponding mCovered bit is not set. |
| 444 ** | 396 ** |
| 445 ** The selected snippet parameters are stored in structure *pFragment before | 397 ** The selected snippet parameters are stored in structure *pFragment before |
| 446 ** returning. The score of the selected snippet is stored in *piScore | 398 ** returning. The score of the selected snippet is stored in *piScore |
| 447 ** before returning. | 399 ** before returning. |
| 448 */ | 400 */ |
| 449 static int fts3BestSnippet( | 401 static int fts3BestSnippet( |
| 450 int nSnippet, /* Desired snippet length */ | 402 int nSnippet, /* Desired snippet length */ |
| 451 Fts3Cursor *pCsr, /* Cursor to create snippet for */ | 403 Fts3Cursor *pCsr, /* Cursor to create snippet for */ |
| 452 int iCol, /* Index of column to create snippet from */ | 404 int iCol, /* Index of column to create snippet from */ |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 545 */ | 497 */ |
| 546 if( pStr->n+nAppend+1>=pStr->nAlloc ){ | 498 if( pStr->n+nAppend+1>=pStr->nAlloc ){ |
| 547 int nAlloc = pStr->nAlloc+nAppend+100; | 499 int nAlloc = pStr->nAlloc+nAppend+100; |
| 548 char *zNew = sqlite3_realloc(pStr->z, nAlloc); | 500 char *zNew = sqlite3_realloc(pStr->z, nAlloc); |
| 549 if( !zNew ){ | 501 if( !zNew ){ |
| 550 return SQLITE_NOMEM; | 502 return SQLITE_NOMEM; |
| 551 } | 503 } |
| 552 pStr->z = zNew; | 504 pStr->z = zNew; |
| 553 pStr->nAlloc = nAlloc; | 505 pStr->nAlloc = nAlloc; |
| 554 } | 506 } |
| 507 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); |
| 555 | 508 |
| 556 /* Append the data to the string buffer. */ | 509 /* Append the data to the string buffer. */ |
| 557 memcpy(&pStr->z[pStr->n], zAppend, nAppend); | 510 memcpy(&pStr->z[pStr->n], zAppend, nAppend); |
| 558 pStr->n += nAppend; | 511 pStr->n += nAppend; |
| 559 pStr->z[pStr->n] = '\0'; | 512 pStr->z[pStr->n] = '\0'; |
| 560 | 513 |
| 561 return SQLITE_OK; | 514 return SQLITE_OK; |
| 562 } | 515 } |
| 563 | 516 |
| 564 /* | 517 /* |
| (...skipping 11 matching lines...) Expand all Loading... |
| 576 ** | 529 ** |
| 577 ** ....X.....X.... | 530 ** ....X.....X.... |
| 578 ** | 531 ** |
| 579 ** This is done as part of extracting the snippet text, not when selecting | 532 ** This is done as part of extracting the snippet text, not when selecting |
| 580 ** the snippet. Snippet selection is done based on doclists only, so there | 533 ** the snippet. Snippet selection is done based on doclists only, so there |
| 581 ** is no way for fts3BestSnippet() to know whether or not the document | 534 ** is no way for fts3BestSnippet() to know whether or not the document |
| 582 ** actually contains terms that follow the final highlighted term. | 535 ** actually contains terms that follow the final highlighted term. |
| 583 */ | 536 */ |
| 584 static int fts3SnippetShift( | 537 static int fts3SnippetShift( |
| 585 Fts3Table *pTab, /* FTS3 table snippet comes from */ | 538 Fts3Table *pTab, /* FTS3 table snippet comes from */ |
| 539 int iLangid, /* Language id to use in tokenizing */ |
| 586 int nSnippet, /* Number of tokens desired for snippet */ | 540 int nSnippet, /* Number of tokens desired for snippet */ |
| 587 const char *zDoc, /* Document text to extract snippet from */ | 541 const char *zDoc, /* Document text to extract snippet from */ |
| 588 int nDoc, /* Size of buffer zDoc in bytes */ | 542 int nDoc, /* Size of buffer zDoc in bytes */ |
| 589 int *piPos, /* IN/OUT: First token of snippet */ | 543 int *piPos, /* IN/OUT: First token of snippet */ |
| 590 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ | 544 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ |
| 591 ){ | 545 ){ |
| 592 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ | 546 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ |
| 593 | 547 |
| 594 if( hlmask ){ | 548 if( hlmask ){ |
| 595 int nLeft; /* Tokens to the left of first highlight */ | 549 int nLeft; /* Tokens to the left of first highlight */ |
| (...skipping 15 matching lines...) Expand all Loading... |
| 611 int nShift; /* Number of tokens to shift snippet by */ | 565 int nShift; /* Number of tokens to shift snippet by */ |
| 612 int iCurrent = 0; /* Token counter */ | 566 int iCurrent = 0; /* Token counter */ |
| 613 int rc; /* Return Code */ | 567 int rc; /* Return Code */ |
| 614 sqlite3_tokenizer_module *pMod; | 568 sqlite3_tokenizer_module *pMod; |
| 615 sqlite3_tokenizer_cursor *pC; | 569 sqlite3_tokenizer_cursor *pC; |
| 616 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | 570 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 617 | 571 |
| 618 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) | 572 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) |
| 619 ** or more tokens in zDoc/nDoc. | 573 ** or more tokens in zDoc/nDoc. |
| 620 */ | 574 */ |
| 621 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); | 575 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); |
| 622 if( rc!=SQLITE_OK ){ | 576 if( rc!=SQLITE_OK ){ |
| 623 return rc; | 577 return rc; |
| 624 } | 578 } |
| 625 pC->pTokenizer = pTab->pTokenizer; | |
| 626 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ | 579 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ |
| 627 const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; | 580 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; |
| 628 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); | 581 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); |
| 629 } | 582 } |
| 630 pMod->xClose(pC); | 583 pMod->xClose(pC); |
| 631 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } | 584 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } |
| 632 | 585 |
| 633 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; | 586 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; |
| 634 assert( nShift<=nDesired ); | 587 assert( nShift<=nDesired ); |
| 635 if( nShift>0 ){ | 588 if( nShift>0 ){ |
| 636 *piPos += nShift; | 589 *piPos += nShift; |
| 637 *pHlmask = hlmask >> nShift; | 590 *pHlmask = hlmask >> nShift; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 661 const char *zDoc; /* Document text to extract snippet from */ | 614 const char *zDoc; /* Document text to extract snippet from */ |
| 662 int nDoc; /* Size of zDoc in bytes */ | 615 int nDoc; /* Size of zDoc in bytes */ |
| 663 int iCurrent = 0; /* Current token number of document */ | 616 int iCurrent = 0; /* Current token number of document */ |
| 664 int iEnd = 0; /* Byte offset of end of current token */ | 617 int iEnd = 0; /* Byte offset of end of current token */ |
| 665 int isShiftDone = 0; /* True after snippet is shifted */ | 618 int isShiftDone = 0; /* True after snippet is shifted */ |
| 666 int iPos = pFragment->iPos; /* First token of snippet */ | 619 int iPos = pFragment->iPos; /* First token of snippet */ |
| 667 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ | 620 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ |
| 668 int iCol = pFragment->iCol+1; /* Query column to extract text from */ | 621 int iCol = pFragment->iCol+1; /* Query column to extract text from */ |
| 669 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ | 622 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ |
| 670 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ | 623 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ |
| 671 const char *ZDUMMY; /* Dummy argument used with tokenizer */ | |
| 672 int DUMMY1; /* Dummy argument used with tokenizer */ | |
| 673 | 624 |
| 674 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); | 625 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); |
| 675 if( zDoc==0 ){ | 626 if( zDoc==0 ){ |
| 676 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ | 627 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ |
| 677 return SQLITE_NOMEM; | 628 return SQLITE_NOMEM; |
| 678 } | 629 } |
| 679 return SQLITE_OK; | 630 return SQLITE_OK; |
| 680 } | 631 } |
| 681 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); | 632 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); |
| 682 | 633 |
| 683 /* Open a token cursor on the document. */ | 634 /* Open a token cursor on the document. */ |
| 684 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | 635 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
| 685 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); | 636 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); |
| 686 if( rc!=SQLITE_OK ){ | 637 if( rc!=SQLITE_OK ){ |
| 687 return rc; | 638 return rc; |
| 688 } | 639 } |
| 689 pC->pTokenizer = pTab->pTokenizer; | |
| 690 | 640 |
| 691 while( rc==SQLITE_OK ){ | 641 while( rc==SQLITE_OK ){ |
| 692 int iBegin; /* Offset in zDoc of start of token */ | 642 const char *ZDUMMY; /* Dummy argument used with tokenizer */ |
| 693 int iFin; /* Offset in zDoc of end of token */ | 643 int DUMMY1 = -1; /* Dummy argument used with tokenizer */ |
| 694 int isHighlight; /* True for highlighted terms */ | 644 int iBegin = 0; /* Offset in zDoc of start of token */ |
| 645 int iFin = 0; /* Offset in zDoc of end of token */ |
| 646 int isHighlight = 0; /* True for highlighted terms */ |
| 695 | 647 |
| 648 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere |
| 649 ** in the FTS code the variable that the third argument to xNext points to |
| 650 ** is initialized to zero before the first (*but not necessarily |
| 651 ** subsequent*) call to xNext(). This is done for a particular application |
| 652 ** that needs to know whether or not the tokenizer is being used for |
| 653 ** snippet generation or for some other purpose. |
| 654 ** |
| 655 ** Extreme care is required when writing code to depend on this |
| 656 ** initialization. It is not a documented part of the tokenizer interface. |
| 657 ** If a tokenizer is used directly by any code outside of FTS, this |
| 658 ** convention might not be respected. */ |
| 696 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); | 659 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); |
| 697 if( rc!=SQLITE_OK ){ | 660 if( rc!=SQLITE_OK ){ |
| 698 if( rc==SQLITE_DONE ){ | 661 if( rc==SQLITE_DONE ){ |
| 699 /* Special case - the last token of the snippet is also the last token | 662 /* Special case - the last token of the snippet is also the last token |
| 700 ** of the column. Append any punctuation that occurred between the end | 663 ** of the column. Append any punctuation that occurred between the end |
| 701 ** of the previous token and the end of the document to the output. | 664 ** of the previous token and the end of the document to the output. |
| 702 ** Then break out of the loop. */ | 665 ** Then break out of the loop. */ |
| 703 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); | 666 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); |
| 704 } | 667 } |
| 705 break; | 668 break; |
| 706 } | 669 } |
| 707 if( iCurrent<iPos ){ continue; } | 670 if( iCurrent<iPos ){ continue; } |
| 708 | 671 |
| 709 if( !isShiftDone ){ | 672 if( !isShiftDone ){ |
| 710 int n = nDoc - iBegin; | 673 int n = nDoc - iBegin; |
| 711 rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); | 674 rc = fts3SnippetShift( |
| 675 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask |
| 676 ); |
| 712 isShiftDone = 1; | 677 isShiftDone = 1; |
| 713 | 678 |
| 714 /* Now that the shift has been done, check if the initial "..." are | 679 /* Now that the shift has been done, check if the initial "..." are |
| 715 ** required. They are required if (a) this is not the first fragment, | 680 ** required. They are required if (a) this is not the first fragment, |
| 716 ** or (b) this fragment does not begin at position 0 of its column. | 681 ** or (b) this fragment does not begin at position 0 of its column. |
| 717 */ | 682 */ |
| 718 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ | 683 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ |
| 719 rc = fts3StringAppend(pOut, zEllipsis, -1); | 684 rc = fts3StringAppend(pOut, zEllipsis, -1); |
| 720 } | 685 } |
| 721 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; | 686 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 765 /* A column-list is terminated by either a 0x01 or 0x00. */ | 730 /* A column-list is terminated by either a 0x01 or 0x00. */ |
| 766 while( 0xFE & (*pEnd | c) ){ | 731 while( 0xFE & (*pEnd | c) ){ |
| 767 c = *pEnd++ & 0x80; | 732 c = *pEnd++ & 0x80; |
| 768 if( !c ) nEntry++; | 733 if( !c ) nEntry++; |
| 769 } | 734 } |
| 770 | 735 |
| 771 *ppCollist = pEnd; | 736 *ppCollist = pEnd; |
| 772 return nEntry; | 737 return nEntry; |
| 773 } | 738 } |
| 774 | 739 |
| 775 static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){ | |
| 776 char *pCsr = *pp; | |
| 777 while( *pCsr ){ | |
| 778 int nHit; | |
| 779 sqlite3_int64 iCol = 0; | |
| 780 if( *pCsr==0x01 ){ | |
| 781 pCsr++; | |
| 782 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); | |
| 783 } | |
| 784 nHit = fts3ColumnlistCount(&pCsr); | |
| 785 assert( nHit>0 ); | |
| 786 if( isGlobal ){ | |
| 787 aOut[iCol*3+1]++; | |
| 788 } | |
| 789 aOut[iCol*3] += nHit; | |
| 790 } | |
| 791 pCsr++; | |
| 792 *pp = pCsr; | |
| 793 } | |
| 794 | |
| 795 /* | 740 /* |
| 796 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats | 741 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats |
| 797 ** for a single query. | 742 ** for a single query. |
| 798 ** | 743 ** |
| 799 ** fts3ExprIterate() callback to load the 'global' elements of a | 744 ** fts3ExprIterate() callback to load the 'global' elements of a |
| 800 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements | 745 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements |
| 801 ** of the matchinfo array that are constant for all rows returned by the | 746 ** of the matchinfo array that are constant for all rows returned by the |
| 802 ** current query. | 747 ** current query. |
| 803 ** | 748 ** |
| 804 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This | 749 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This |
| (...skipping 13 matching lines...) Expand all Loading... |
| 818 ** file system. This is done because the full-text index doclist is required | 763 ** file system. This is done because the full-text index doclist is required |
| 819 ** to calculate these values properly, and the full-text index doclist is | 764 ** to calculate these values properly, and the full-text index doclist is |
| 820 ** not available for deferred tokens. | 765 ** not available for deferred tokens. |
| 821 */ | 766 */ |
| 822 static int fts3ExprGlobalHitsCb( | 767 static int fts3ExprGlobalHitsCb( |
| 823 Fts3Expr *pExpr, /* Phrase expression node */ | 768 Fts3Expr *pExpr, /* Phrase expression node */ |
| 824 int iPhrase, /* Phrase number (numbered from zero) */ | 769 int iPhrase, /* Phrase number (numbered from zero) */ |
| 825 void *pCtx /* Pointer to MatchInfo structure */ | 770 void *pCtx /* Pointer to MatchInfo structure */ |
| 826 ){ | 771 ){ |
| 827 MatchInfo *p = (MatchInfo *)pCtx; | 772 MatchInfo *p = (MatchInfo *)pCtx; |
| 828 Fts3Cursor *pCsr = p->pCursor; | 773 return sqlite3Fts3EvalPhraseStats( |
| 829 char *pIter; | 774 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] |
| 830 char *pEnd; | 775 ); |
| 831 char *pFree = 0; | |
| 832 u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; | |
| 833 | |
| 834 assert( pExpr->isLoaded ); | |
| 835 assert( pExpr->eType==FTSQUERY_PHRASE ); | |
| 836 | |
| 837 if( pCsr->pDeferred ){ | |
| 838 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
| 839 int ii; | |
| 840 for(ii=0; ii<pPhrase->nToken; ii++){ | |
| 841 if( pPhrase->aToken[ii].bFulltext ) break; | |
| 842 } | |
| 843 if( ii<pPhrase->nToken ){ | |
| 844 int nFree = 0; | |
| 845 int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); | |
| 846 if( rc!=SQLITE_OK ) return rc; | |
| 847 pIter = pFree; | |
| 848 pEnd = &pFree[nFree]; | |
| 849 }else{ | |
| 850 int iCol; /* Column index */ | |
| 851 for(iCol=0; iCol<p->nCol; iCol++){ | |
| 852 aOut[iCol*3 + 1] = (u32)p->nDoc; | |
| 853 aOut[iCol*3 + 2] = (u32)p->nDoc; | |
| 854 } | |
| 855 return SQLITE_OK; | |
| 856 } | |
| 857 }else{ | |
| 858 pIter = pExpr->aDoclist; | |
| 859 pEnd = &pExpr->aDoclist[pExpr->nDoclist]; | |
| 860 } | |
| 861 | |
| 862 /* Fill in the global hit count matrix row for this phrase. */ | |
| 863 while( pIter<pEnd ){ | |
| 864 while( *pIter++ & 0x80 ); /* Skip past docid. */ | |
| 865 fts3LoadColumnlistCounts(&pIter, &aOut[1], 1); | |
| 866 } | |
| 867 | |
| 868 sqlite3_free(pFree); | |
| 869 return SQLITE_OK; | |
| 870 } | 776 } |
| 871 | 777 |
| 872 /* | 778 /* |
| 873 ** fts3ExprIterate() callback used to collect the "local" part of the | 779 ** fts3ExprIterate() callback used to collect the "local" part of the |
| 874 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the | 780 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the |
| 875 ** array that are different for each row returned by the query. | 781 ** array that are different for each row returned by the query. |
| 876 */ | 782 */ |
| 877 static int fts3ExprLocalHitsCb( | 783 static int fts3ExprLocalHitsCb( |
| 878 Fts3Expr *pExpr, /* Phrase expression node */ | 784 Fts3Expr *pExpr, /* Phrase expression node */ |
| 879 int iPhrase, /* Phrase number */ | 785 int iPhrase, /* Phrase number */ |
| 880 void *pCtx /* Pointer to MatchInfo structure */ | 786 void *pCtx /* Pointer to MatchInfo structure */ |
| 881 ){ | 787 ){ |
| 788 int rc = SQLITE_OK; |
| 882 MatchInfo *p = (MatchInfo *)pCtx; | 789 MatchInfo *p = (MatchInfo *)pCtx; |
| 883 int iStart = iPhrase * p->nCol * 3; | 790 int iStart = iPhrase * p->nCol * 3; |
| 884 int i; | 791 int i; |
| 885 | 792 |
| 886 for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0; | 793 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ |
| 887 | |
| 888 if( pExpr->aDoclist ){ | |
| 889 char *pCsr; | 794 char *pCsr; |
| 890 | 795 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); |
| 891 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); | |
| 892 if( pCsr ){ | 796 if( pCsr ){ |
| 893 fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); | 797 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); |
| 798 }else{ |
| 799 p->aMatchinfo[iStart+i*3] = 0; |
| 894 } | 800 } |
| 895 } | 801 } |
| 896 | 802 |
| 897 return SQLITE_OK; | 803 return rc; |
| 898 } | 804 } |
| 899 | 805 |
| 900 static int fts3MatchinfoCheck( | 806 static int fts3MatchinfoCheck( |
| 901 Fts3Table *pTab, | 807 Fts3Table *pTab, |
| 902 char cArg, | 808 char cArg, |
| 903 char **pzErr | 809 char **pzErr |
| 904 ){ | 810 ){ |
| 905 if( (cArg==FTS3_MATCHINFO_NPHRASE) | 811 if( (cArg==FTS3_MATCHINFO_NPHRASE) |
| 906 || (cArg==FTS3_MATCHINFO_NCOL) | 812 || (cArg==FTS3_MATCHINFO_NCOL) |
| 907 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) | 813 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) |
| 908 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) | 814 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) |
| 909 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) | 815 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) |
| 910 || (cArg==FTS3_MATCHINFO_LCS) | 816 || (cArg==FTS3_MATCHINFO_LCS) |
| 911 || (cArg==FTS3_MATCHINFO_HITS) | 817 || (cArg==FTS3_MATCHINFO_HITS) |
| 912 ){ | 818 ){ |
| 913 return SQLITE_OK; | 819 return SQLITE_OK; |
| 914 } | 820 } |
| 915 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); | 821 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); |
| 916 return SQLITE_ERROR; | 822 return SQLITE_ERROR; |
| 917 } | 823 } |
| 918 | 824 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 953 | 859 |
| 954 if( !*ppStmt ){ | 860 if( !*ppStmt ){ |
| 955 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); | 861 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); |
| 956 if( rc!=SQLITE_OK ) return rc; | 862 if( rc!=SQLITE_OK ) return rc; |
| 957 } | 863 } |
| 958 pStmt = *ppStmt; | 864 pStmt = *ppStmt; |
| 959 assert( sqlite3_data_count(pStmt)==1 ); | 865 assert( sqlite3_data_count(pStmt)==1 ); |
| 960 | 866 |
| 961 a = sqlite3_column_blob(pStmt, 0); | 867 a = sqlite3_column_blob(pStmt, 0); |
| 962 a += sqlite3Fts3GetVarint(a, &nDoc); | 868 a += sqlite3Fts3GetVarint(a, &nDoc); |
| 963 if( nDoc==0 ) return SQLITE_CORRUPT; | 869 if( nDoc==0 ) return FTS_CORRUPT_VTAB; |
| 964 *pnDoc = (u32)nDoc; | 870 *pnDoc = (u32)nDoc; |
| 965 | 871 |
| 966 if( paLen ) *paLen = a; | 872 if( paLen ) *paLen = a; |
| 967 return SQLITE_OK; | 873 return SQLITE_OK; |
| 968 } | 874 } |
| 969 | 875 |
| 970 /* | 876 /* |
| 971 ** An instance of the following structure is used to store state while | 877 ** An instance of the following structure is used to store state while |
| 972 ** iterating through a multi-column position-list corresponding to the | 878 ** iterating through a multi-column position-list corresponding to the |
| 973 ** hits for a single phrase on a single row in order to calculate the | 879 ** hits for a single phrase on a single row in order to calculate the |
| 974 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. | 880 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. |
| 975 */ | 881 */ |
| 976 typedef struct LcsIterator LcsIterator; | 882 typedef struct LcsIterator LcsIterator; |
| 977 struct LcsIterator { | 883 struct LcsIterator { |
| 978 Fts3Expr *pExpr; /* Pointer to phrase expression */ | 884 Fts3Expr *pExpr; /* Pointer to phrase expression */ |
| 885 int iPosOffset; /* Tokens count up to end of this phrase */ |
| 979 char *pRead; /* Cursor used to iterate through aDoclist */ | 886 char *pRead; /* Cursor used to iterate through aDoclist */ |
| 980 int iPosOffset; /* Tokens count up to end of this phrase */ | |
| 981 int iCol; /* Current column number */ | |
| 982 int iPos; /* Current position */ | 887 int iPos; /* Current position */ |
| 983 }; | 888 }; |
| 984 | 889 |
| 985 /* | 890 /* |
| 986 ** If LcsIterator.iCol is set to the following value, the iterator has | 891 ** If LcsIterator.iCol is set to the following value, the iterator has |
| 987 ** finished iterating through all offsets for all columns. | 892 ** finished iterating through all offsets for all columns. |
| 988 */ | 893 */ |
| 989 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; | 894 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; |
| 990 | 895 |
| 991 static int fts3MatchinfoLcsCb( | 896 static int fts3MatchinfoLcsCb( |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1002 ** Advance the iterator passed as an argument to the next position. Return | 907 ** Advance the iterator passed as an argument to the next position. Return |
| 1003 ** 1 if the iterator is at EOF or if it now points to the start of the | 908 ** 1 if the iterator is at EOF or if it now points to the start of the |
| 1004 ** position list for the next column. | 909 ** position list for the next column. |
| 1005 */ | 910 */ |
| 1006 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ | 911 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ |
| 1007 char *pRead = pIter->pRead; | 912 char *pRead = pIter->pRead; |
| 1008 sqlite3_int64 iRead; | 913 sqlite3_int64 iRead; |
| 1009 int rc = 0; | 914 int rc = 0; |
| 1010 | 915 |
| 1011 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | 916 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
| 1012 if( iRead==0 ){ | 917 if( iRead==0 || iRead==1 ){ |
| 1013 pIter->iCol = LCS_ITERATOR_FINISHED; | 918 pRead = 0; |
| 1014 rc = 1; | 919 rc = 1; |
| 1015 }else{ | 920 }else{ |
| 1016 if( iRead==1 ){ | |
| 1017 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | |
| 1018 pIter->iCol = (int)iRead; | |
| 1019 pIter->iPos = pIter->iPosOffset; | |
| 1020 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | |
| 1021 rc = 1; | |
| 1022 } | |
| 1023 pIter->iPos += (int)(iRead-2); | 921 pIter->iPos += (int)(iRead-2); |
| 1024 } | 922 } |
| 1025 | 923 |
| 1026 pIter->pRead = pRead; | 924 pIter->pRead = pRead; |
| 1027 return rc; | 925 return rc; |
| 1028 } | 926 } |
| 1029 | 927 |
| 1030 /* | 928 /* |
| 1031 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. | 929 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. |
| 1032 ** | 930 ** |
| (...skipping 11 matching lines...) Expand all Loading... |
| 1044 int iCol; | 942 int iCol; |
| 1045 int nToken = 0; | 943 int nToken = 0; |
| 1046 | 944 |
| 1047 /* Allocate and populate the array of LcsIterator objects. The array | 945 /* Allocate and populate the array of LcsIterator objects. The array |
| 1048 ** contains one element for each matchable phrase in the query. | 946 ** contains one element for each matchable phrase in the query. |
| 1049 **/ | 947 **/ |
| 1050 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); | 948 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); |
| 1051 if( !aIter ) return SQLITE_NOMEM; | 949 if( !aIter ) return SQLITE_NOMEM; |
| 1052 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); | 950 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); |
| 1053 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); | 951 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); |
| 952 |
| 1054 for(i=0; i<pInfo->nPhrase; i++){ | 953 for(i=0; i<pInfo->nPhrase; i++){ |
| 1055 LcsIterator *pIter = &aIter[i]; | 954 LcsIterator *pIter = &aIter[i]; |
| 1056 nToken -= pIter->pExpr->pPhrase->nToken; | 955 nToken -= pIter->pExpr->pPhrase->nToken; |
| 1057 pIter->iPosOffset = nToken; | 956 pIter->iPosOffset = nToken; |
| 1058 pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); | |
| 1059 if( pIter->pRead ){ | |
| 1060 pIter->iPos = pIter->iPosOffset; | |
| 1061 fts3LcsIteratorAdvance(&aIter[i]); | |
| 1062 }else{ | |
| 1063 pIter->iCol = LCS_ITERATOR_FINISHED; | |
| 1064 } | |
| 1065 } | 957 } |
| 1066 | 958 |
| 1067 for(iCol=0; iCol<pInfo->nCol; iCol++){ | 959 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
| 1068 int nLcs = 0; /* LCS value for this column */ | 960 int nLcs = 0; /* LCS value for this column */ |
| 1069 int nLive = 0; /* Number of iterators in aIter not at EOF */ | 961 int nLive = 0; /* Number of iterators in aIter not at EOF */ |
| 1070 | 962 |
| 1071 /* Loop through the iterators in aIter[]. Set nLive to the number of | |
| 1072 ** iterators that point to a position-list corresponding to column iCol. | |
| 1073 */ | |
| 1074 for(i=0; i<pInfo->nPhrase; i++){ | 963 for(i=0; i<pInfo->nPhrase; i++){ |
| 1075 assert( aIter[i].iCol>=iCol ); | 964 int rc; |
| 1076 if( aIter[i].iCol==iCol ) nLive++; | 965 LcsIterator *pIt = &aIter[i]; |
| 966 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); |
| 967 if( rc!=SQLITE_OK ) return rc; |
| 968 if( pIt->pRead ){ |
| 969 pIt->iPos = pIt->iPosOffset; |
| 970 fts3LcsIteratorAdvance(&aIter[i]); |
| 971 nLive++; |
| 972 } |
| 1077 } | 973 } |
| 1078 | 974 |
| 1079 /* The following loop runs until all iterators in aIter[] have finished | |
| 1080 ** iterating through positions in column iCol. Exactly one of the | |
| 1081 ** iterators is advanced each time the body of the loop is run. | |
| 1082 */ | |
| 1083 while( nLive>0 ){ | 975 while( nLive>0 ){ |
| 1084 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ | 976 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ |
| 1085 int nThisLcs = 0; /* LCS for the current iterator positions */ | 977 int nThisLcs = 0; /* LCS for the current iterator positions */ |
| 1086 | 978 |
| 1087 for(i=0; i<pInfo->nPhrase; i++){ | 979 for(i=0; i<pInfo->nPhrase; i++){ |
| 1088 LcsIterator *pIter = &aIter[i]; | 980 LcsIterator *pIter = &aIter[i]; |
| 1089 if( iCol!=pIter->iCol ){ | 981 if( pIter->pRead==0 ){ |
| 1090 /* This iterator is already at EOF for this column. */ | 982 /* This iterator is already at EOF for this column. */ |
| 1091 nThisLcs = 0; | 983 nThisLcs = 0; |
| 1092 }else{ | 984 }else{ |
| 1093 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ | 985 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ |
| 1094 pAdv = pIter; | 986 pAdv = pIter; |
| 1095 } | 987 } |
| 1096 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ | 988 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ |
| 1097 nThisLcs++; | 989 nThisLcs++; |
| 1098 }else{ | 990 }else{ |
| 1099 nThisLcs = 1; | 991 nThisLcs = 1; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1145 case FTS3_MATCHINFO_NPHRASE: | 1037 case FTS3_MATCHINFO_NPHRASE: |
| 1146 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; | 1038 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; |
| 1147 break; | 1039 break; |
| 1148 | 1040 |
| 1149 case FTS3_MATCHINFO_NCOL: | 1041 case FTS3_MATCHINFO_NCOL: |
| 1150 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; | 1042 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; |
| 1151 break; | 1043 break; |
| 1152 | 1044 |
| 1153 case FTS3_MATCHINFO_NDOC: | 1045 case FTS3_MATCHINFO_NDOC: |
| 1154 if( bGlobal ){ | 1046 if( bGlobal ){ |
| 1155 sqlite3_int64 nDoc; | 1047 sqlite3_int64 nDoc = 0; |
| 1156 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); | 1048 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); |
| 1157 pInfo->aMatchinfo[0] = (u32)nDoc; | 1049 pInfo->aMatchinfo[0] = (u32)nDoc; |
| 1158 } | 1050 } |
| 1159 break; | 1051 break; |
| 1160 | 1052 |
| 1161 case FTS3_MATCHINFO_AVGLENGTH: | 1053 case FTS3_MATCHINFO_AVGLENGTH: |
| 1162 if( bGlobal ){ | 1054 if( bGlobal ){ |
| 1163 sqlite3_int64 nDoc; /* Number of rows in table */ | 1055 sqlite3_int64 nDoc; /* Number of rows in table */ |
| 1164 const char *a; /* Aggregate column length array */ | 1056 const char *a; /* Aggregate column length array */ |
| 1165 | 1057 |
| (...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1401 typedef struct TermOffset TermOffset; | 1293 typedef struct TermOffset TermOffset; |
| 1402 typedef struct TermOffsetCtx TermOffsetCtx; | 1294 typedef struct TermOffsetCtx TermOffsetCtx; |
| 1403 | 1295 |
| 1404 struct TermOffset { | 1296 struct TermOffset { |
| 1405 char *pList; /* Position-list */ | 1297 char *pList; /* Position-list */ |
| 1406 int iPos; /* Position just read from pList */ | 1298 int iPos; /* Position just read from pList */ |
| 1407 int iOff; /* Offset of this term from read positions */ | 1299 int iOff; /* Offset of this term from read positions */ |
| 1408 }; | 1300 }; |
| 1409 | 1301 |
| 1410 struct TermOffsetCtx { | 1302 struct TermOffsetCtx { |
| 1303 Fts3Cursor *pCsr; |
| 1411 int iCol; /* Column of table to populate aTerm for */ | 1304 int iCol; /* Column of table to populate aTerm for */ |
| 1412 int iTerm; | 1305 int iTerm; |
| 1413 sqlite3_int64 iDocid; | 1306 sqlite3_int64 iDocid; |
| 1414 TermOffset *aTerm; | 1307 TermOffset *aTerm; |
| 1415 }; | 1308 }; |
| 1416 | 1309 |
| 1417 /* | 1310 /* |
| 1418 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). | 1311 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). |
| 1419 */ | 1312 */ |
| 1420 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ | 1313 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
| 1421 TermOffsetCtx *p = (TermOffsetCtx *)ctx; | 1314 TermOffsetCtx *p = (TermOffsetCtx *)ctx; |
| 1422 int nTerm; /* Number of tokens in phrase */ | 1315 int nTerm; /* Number of tokens in phrase */ |
| 1423 int iTerm; /* For looping through nTerm phrase terms */ | 1316 int iTerm; /* For looping through nTerm phrase terms */ |
| 1424 char *pList; /* Pointer to position list for phrase */ | 1317 char *pList; /* Pointer to position list for phrase */ |
| 1425 int iPos = 0; /* First position in position-list */ | 1318 int iPos = 0; /* First position in position-list */ |
| 1319 int rc; |
| 1426 | 1320 |
| 1427 UNUSED_PARAMETER(iPhrase); | 1321 UNUSED_PARAMETER(iPhrase); |
| 1428 pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); | 1322 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); |
| 1429 nTerm = pExpr->pPhrase->nToken; | 1323 nTerm = pExpr->pPhrase->nToken; |
| 1430 if( pList ){ | 1324 if( pList ){ |
| 1431 fts3GetDeltaPosition(&pList, &iPos); | 1325 fts3GetDeltaPosition(&pList, &iPos); |
| 1432 assert( iPos>=0 ); | 1326 assert( iPos>=0 ); |
| 1433 } | 1327 } |
| 1434 | 1328 |
| 1435 for(iTerm=0; iTerm<nTerm; iTerm++){ | 1329 for(iTerm=0; iTerm<nTerm; iTerm++){ |
| 1436 TermOffset *pT = &p->aTerm[p->iTerm++]; | 1330 TermOffset *pT = &p->aTerm[p->iTerm++]; |
| 1437 pT->iOff = nTerm-iTerm-1; | 1331 pT->iOff = nTerm-iTerm-1; |
| 1438 pT->pList = pList; | 1332 pT->pList = pList; |
| 1439 pT->iPos = iPos; | 1333 pT->iPos = iPos; |
| 1440 } | 1334 } |
| 1441 | 1335 |
| 1442 return SQLITE_OK; | 1336 return rc; |
| 1443 } | 1337 } |
| 1444 | 1338 |
| 1445 /* | 1339 /* |
| 1446 ** Implementation of offsets() function. | 1340 ** Implementation of offsets() function. |
| 1447 */ | 1341 */ |
| 1448 void sqlite3Fts3Offsets( | 1342 void sqlite3Fts3Offsets( |
| 1449 sqlite3_context *pCtx, /* SQLite function call context */ | 1343 sqlite3_context *pCtx, /* SQLite function call context */ |
| 1450 Fts3Cursor *pCsr /* Cursor object */ | 1344 Fts3Cursor *pCsr /* Cursor object */ |
| 1451 ){ | 1345 ){ |
| 1452 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | 1346 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
| 1453 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; | 1347 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; |
| 1454 const char *ZDUMMY; /* Dummy argument used with xNext() */ | |
| 1455 int NDUMMY; /* Dummy argument used with xNext() */ | |
| 1456 int rc; /* Return Code */ | 1348 int rc; /* Return Code */ |
| 1457 int nToken; /* Number of tokens in query */ | 1349 int nToken; /* Number of tokens in query */ |
| 1458 int iCol; /* Column currently being processed */ | 1350 int iCol; /* Column currently being processed */ |
| 1459 StrBuffer res = {0, 0, 0}; /* Result string */ | 1351 StrBuffer res = {0, 0, 0}; /* Result string */ |
| 1460 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ | 1352 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ |
| 1461 | 1353 |
| 1462 if( !pCsr->pExpr ){ | 1354 if( !pCsr->pExpr ){ |
| 1463 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); | 1355 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
| 1464 return; | 1356 return; |
| 1465 } | 1357 } |
| 1466 | 1358 |
| 1467 memset(&sCtx, 0, sizeof(sCtx)); | 1359 memset(&sCtx, 0, sizeof(sCtx)); |
| 1468 assert( pCsr->isRequireSeek==0 ); | 1360 assert( pCsr->isRequireSeek==0 ); |
| 1469 | 1361 |
| 1470 /* Count the number of terms in the query */ | 1362 /* Count the number of terms in the query */ |
| 1471 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); | 1363 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); |
| 1472 if( rc!=SQLITE_OK ) goto offsets_out; | 1364 if( rc!=SQLITE_OK ) goto offsets_out; |
| 1473 | 1365 |
| 1474 /* Allocate the array of TermOffset iterators. */ | 1366 /* Allocate the array of TermOffset iterators. */ |
| 1475 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); | 1367 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); |
| 1476 if( 0==sCtx.aTerm ){ | 1368 if( 0==sCtx.aTerm ){ |
| 1477 rc = SQLITE_NOMEM; | 1369 rc = SQLITE_NOMEM; |
| 1478 goto offsets_out; | 1370 goto offsets_out; |
| 1479 } | 1371 } |
| 1480 sCtx.iDocid = pCsr->iPrevId; | 1372 sCtx.iDocid = pCsr->iPrevId; |
| 1373 sCtx.pCsr = pCsr; |
| 1481 | 1374 |
| 1482 /* Loop through the table columns, appending offset information to | 1375 /* Loop through the table columns, appending offset information to |
| 1483 ** string-buffer res for each column. | 1376 ** string-buffer res for each column. |
| 1484 */ | 1377 */ |
| 1485 for(iCol=0; iCol<pTab->nColumn; iCol++){ | 1378 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
| 1486 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ | 1379 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ |
| 1487 int iStart; | 1380 const char *ZDUMMY; /* Dummy argument used with xNext() */ |
| 1488 int iEnd; | 1381 int NDUMMY = 0; /* Dummy argument used with xNext() */ |
| 1489 int iCurrent; | 1382 int iStart = 0; |
| 1383 int iEnd = 0; |
| 1384 int iCurrent = 0; |
| 1490 const char *zDoc; | 1385 const char *zDoc; |
| 1491 int nDoc; | 1386 int nDoc; |
| 1492 | 1387 |
| 1493 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is | 1388 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is |
| 1494 ** no way that this operation can fail, so the return code from | 1389 ** no way that this operation can fail, so the return code from |
| 1495 ** fts3ExprIterate() can be discarded. | 1390 ** fts3ExprIterate() can be discarded. |
| 1496 */ | 1391 */ |
| 1497 sCtx.iCol = iCol; | 1392 sCtx.iCol = iCol; |
| 1498 sCtx.iTerm = 0; | 1393 sCtx.iTerm = 0; |
| 1499 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); | 1394 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); |
| 1500 | 1395 |
| 1501 /* Retreive the text stored in column iCol. If an SQL NULL is stored | 1396 /* Retreive the text stored in column iCol. If an SQL NULL is stored |
| 1502 ** in column iCol, jump immediately to the next iteration of the loop. | 1397 ** in column iCol, jump immediately to the next iteration of the loop. |
| 1503 ** If an OOM occurs while retrieving the data (this can happen if SQLite | 1398 ** If an OOM occurs while retrieving the data (this can happen if SQLite |
| 1504 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM | 1399 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM |
| 1505 ** to the caller. | 1400 ** to the caller. |
| 1506 */ | 1401 */ |
| 1507 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); | 1402 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); |
| 1508 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); | 1403 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
| 1509 if( zDoc==0 ){ | 1404 if( zDoc==0 ){ |
| 1510 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ | 1405 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ |
| 1511 continue; | 1406 continue; |
| 1512 } | 1407 } |
| 1513 rc = SQLITE_NOMEM; | 1408 rc = SQLITE_NOMEM; |
| 1514 goto offsets_out; | 1409 goto offsets_out; |
| 1515 } | 1410 } |
| 1516 | 1411 |
| 1517 /* Initialize a tokenizer iterator to iterate through column iCol. */ | 1412 /* Initialize a tokenizer iterator to iterate through column iCol. */ |
| 1518 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); | 1413 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, |
| 1414 zDoc, nDoc, &pC |
| 1415 ); |
| 1519 if( rc!=SQLITE_OK ) goto offsets_out; | 1416 if( rc!=SQLITE_OK ) goto offsets_out; |
| 1520 pC->pTokenizer = pTab->pTokenizer; | |
| 1521 | 1417 |
| 1522 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | 1418 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 1523 while( rc==SQLITE_OK ){ | 1419 while( rc==SQLITE_OK ){ |
| 1524 int i; /* Used to loop through terms */ | 1420 int i; /* Used to loop through terms */ |
| 1525 int iMinPos = 0x7FFFFFFF; /* Position of next token */ | 1421 int iMinPos = 0x7FFFFFFF; /* Position of next token */ |
| 1526 TermOffset *pTerm = 0; /* TermOffset associated with next token */ | 1422 TermOffset *pTerm = 0; /* TermOffset associated with next token */ |
| 1527 | 1423 |
| 1528 for(i=0; i<nToken; i++){ | 1424 for(i=0; i<nToken; i++){ |
| 1529 TermOffset *pT = &sCtx.aTerm[i]; | 1425 TermOffset *pT = &sCtx.aTerm[i]; |
| 1530 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ | 1426 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ |
| 1531 iMinPos = pT->iPos-pT->iOff; | 1427 iMinPos = pT->iPos-pT->iOff; |
| 1532 pTerm = pT; | 1428 pTerm = pT; |
| 1533 } | 1429 } |
| 1534 } | 1430 } |
| 1535 | 1431 |
| 1536 if( !pTerm ){ | 1432 if( !pTerm ){ |
| 1537 /* All offsets for this column have been gathered. */ | 1433 /* All offsets for this column have been gathered. */ |
| 1538 break; | 1434 rc = SQLITE_DONE; |
| 1539 }else{ | 1435 }else{ |
| 1540 assert( iCurrent<=iMinPos ); | 1436 assert( iCurrent<=iMinPos ); |
| 1541 if( 0==(0xFE&*pTerm->pList) ){ | 1437 if( 0==(0xFE&*pTerm->pList) ){ |
| 1542 pTerm->pList = 0; | 1438 pTerm->pList = 0; |
| 1543 }else{ | 1439 }else{ |
| 1544 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); | 1440 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); |
| 1545 } | 1441 } |
| 1546 while( rc==SQLITE_OK && iCurrent<iMinPos ){ | 1442 while( rc==SQLITE_OK && iCurrent<iMinPos ){ |
| 1547 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | 1443 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
| 1548 } | 1444 } |
| 1549 if( rc==SQLITE_OK ){ | 1445 if( rc==SQLITE_OK ){ |
| 1550 char aBuffer[64]; | 1446 char aBuffer[64]; |
| 1551 sqlite3_snprintf(sizeof(aBuffer), aBuffer, | 1447 sqlite3_snprintf(sizeof(aBuffer), aBuffer, |
| 1552 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart | 1448 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart |
| 1553 ); | 1449 ); |
| 1554 rc = fts3StringAppend(&res, aBuffer, -1); | 1450 rc = fts3StringAppend(&res, aBuffer, -1); |
| 1555 }else if( rc==SQLITE_DONE ){ | 1451 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ |
| 1556 rc = SQLITE_CORRUPT; | 1452 rc = FTS_CORRUPT_VTAB; |
| 1557 } | 1453 } |
| 1558 } | 1454 } |
| 1559 } | 1455 } |
| 1560 if( rc==SQLITE_DONE ){ | 1456 if( rc==SQLITE_DONE ){ |
| 1561 rc = SQLITE_OK; | 1457 rc = SQLITE_OK; |
| 1562 } | 1458 } |
| 1563 | 1459 |
| 1564 pMod->xClose(pC); | 1460 pMod->xClose(pC); |
| 1565 if( rc!=SQLITE_OK ) goto offsets_out; | 1461 if( rc!=SQLITE_OK ) goto offsets_out; |
| 1566 } | 1462 } |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1616 | 1512 |
| 1617 if( rc!=SQLITE_OK ){ | 1513 if( rc!=SQLITE_OK ){ |
| 1618 sqlite3_result_error_code(pContext, rc); | 1514 sqlite3_result_error_code(pContext, rc); |
| 1619 }else{ | 1515 }else{ |
| 1620 int n = pCsr->nMatchinfo * sizeof(u32); | 1516 int n = pCsr->nMatchinfo * sizeof(u32); |
| 1621 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); | 1517 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); |
| 1622 } | 1518 } |
| 1623 } | 1519 } |
| 1624 | 1520 |
| 1625 #endif | 1521 #endif |
| OLD | NEW |