OLD | NEW |
1 /* | 1 /* |
2 ** 2009 Oct 23 | 2 ** 2009 Oct 23 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
11 ****************************************************************************** | 11 ****************************************************************************** |
12 */ | 12 */ |
13 | 13 |
| 14 #include "fts3Int.h" |
14 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
15 | 16 |
16 #include "fts3Int.h" | |
17 #include <string.h> | 17 #include <string.h> |
18 #include <assert.h> | 18 #include <assert.h> |
19 | 19 |
20 /* | 20 /* |
21 ** Characters that may appear in the second argument to matchinfo(). | 21 ** Characters that may appear in the second argument to matchinfo(). |
22 */ | 22 */ |
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ | 23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ |
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ | 24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ |
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ | 25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ |
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ | 26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
121 ** | 121 ** |
122 ** are encoded. | 122 ** are encoded. |
123 ** | 123 ** |
124 ** When this function is called, *pp points to the start of an element of | 124 ** When this function is called, *pp points to the start of an element of |
125 ** the list. *piPos contains the value of the previous entry in the list. | 125 ** the list. *piPos contains the value of the previous entry in the list. |
126 ** After it returns, *piPos contains the value of the next element of the | 126 ** After it returns, *piPos contains the value of the next element of the |
127 ** list and *pp is advanced to the following varint. | 127 ** list and *pp is advanced to the following varint. |
128 */ | 128 */ |
129 static void fts3GetDeltaPosition(char **pp, int *piPos){ | 129 static void fts3GetDeltaPosition(char **pp, int *piPos){ |
130 int iVal; | 130 int iVal; |
131 *pp += sqlite3Fts3GetVarint32(*pp, &iVal); | 131 *pp += fts3GetVarint32(*pp, &iVal); |
132 *piPos += (iVal-2); | 132 *piPos += (iVal-2); |
133 } | 133 } |
134 | 134 |
135 /* | 135 /* |
136 ** Helper function for fts3ExprIterate() (see below). | 136 ** Helper function for fts3ExprIterate() (see below). |
137 */ | 137 */ |
138 static int fts3ExprIterate2( | 138 static int fts3ExprIterate2( |
139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
140 int *piPhrase, /* Pointer to phrase counter */ | 140 int *piPhrase, /* Pointer to phrase counter */ |
141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
(...skipping 28 matching lines...) Expand all Loading... |
170 static int fts3ExprIterate( | 170 static int fts3ExprIterate( |
171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ | 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ |
172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ | 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ |
173 void *pCtx /* Second argument to pass to callback */ | 173 void *pCtx /* Second argument to pass to callback */ |
174 ){ | 174 ){ |
175 int iPhrase = 0; /* Variable used as the phrase counter */ | 175 int iPhrase = 0; /* Variable used as the phrase counter */ |
176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); | 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); |
177 } | 177 } |
178 | 178 |
179 /* | 179 /* |
180 ** The argument to this function is always a phrase node. Its doclist | |
181 ** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes | |
182 ** to the left of this one in the query tree have already been loaded. | |
183 ** | |
184 ** If this phrase node is part of a series of phrase nodes joined by | |
185 ** NEAR operators (and is not the left-most of said series), then elements are | |
186 ** removed from the phrases doclist consistent with the NEAR restriction. If | |
187 ** required, elements may be removed from the doclists of phrases to the | |
188 ** left of this one that are part of the same series of NEAR operator | |
189 ** connected phrases. | |
190 ** | |
191 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. | |
192 */ | |
193 static int fts3ExprNearTrim(Fts3Expr *pExpr){ | |
194 int rc = SQLITE_OK; | |
195 Fts3Expr *pParent = pExpr->pParent; | |
196 | |
197 assert( pExpr->eType==FTSQUERY_PHRASE ); | |
198 while( rc==SQLITE_OK | |
199 && pParent | |
200 && pParent->eType==FTSQUERY_NEAR | |
201 && pParent->pRight==pExpr | |
202 ){ | |
203 /* This expression (pExpr) is the right-hand-side of a NEAR operator. | |
204 ** Find the expression to the left of the same operator. | |
205 */ | |
206 int nNear = pParent->nNear; | |
207 Fts3Expr *pLeft = pParent->pLeft; | |
208 | |
209 if( pLeft->eType!=FTSQUERY_PHRASE ){ | |
210 assert( pLeft->eType==FTSQUERY_NEAR ); | |
211 assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); | |
212 pLeft = pLeft->pRight; | |
213 } | |
214 | |
215 rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear); | |
216 | |
217 pExpr = pLeft; | |
218 pParent = pExpr->pParent; | |
219 } | |
220 | |
221 return rc; | |
222 } | |
223 | |
224 /* | |
225 ** This is an fts3ExprIterate() callback used while loading the doclists | 180 ** This is an fts3ExprIterate() callback used while loading the doclists |
226 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also | 181 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also |
227 ** fts3ExprLoadDoclists(). | 182 ** fts3ExprLoadDoclists(). |
228 */ | 183 */ |
229 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ | 184 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
230 int rc = SQLITE_OK; | 185 int rc = SQLITE_OK; |
| 186 Fts3Phrase *pPhrase = pExpr->pPhrase; |
231 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; | 187 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; |
232 | 188 |
233 UNUSED_PARAMETER(iPhrase); | 189 UNUSED_PARAMETER(iPhrase); |
234 | 190 |
235 p->nPhrase++; | 191 p->nPhrase++; |
236 p->nToken += pExpr->pPhrase->nToken; | 192 p->nToken += pPhrase->nToken; |
237 | |
238 if( pExpr->isLoaded==0 ){ | |
239 rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); | |
240 pExpr->isLoaded = 1; | |
241 if( rc==SQLITE_OK ){ | |
242 rc = fts3ExprNearTrim(pExpr); | |
243 } | |
244 } | |
245 | 193 |
246 return rc; | 194 return rc; |
247 } | 195 } |
248 | 196 |
249 /* | 197 /* |
250 ** Load the doclists for each phrase in the query associated with FTS3 cursor | 198 ** Load the doclists for each phrase in the query associated with FTS3 cursor |
251 ** pCsr. | 199 ** pCsr. |
252 ** | 200 ** |
253 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable | 201 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable |
254 ** phrases in the expression (all phrases except those directly or | 202 ** phrases in the expression (all phrases except those directly or |
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
405 } | 353 } |
406 | 354 |
407 /* | 355 /* |
408 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). | 356 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). |
409 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. | 357 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. |
410 */ | 358 */ |
411 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ | 359 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
412 SnippetIter *p = (SnippetIter *)ctx; | 360 SnippetIter *p = (SnippetIter *)ctx; |
413 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; | 361 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; |
414 char *pCsr; | 362 char *pCsr; |
| 363 int rc; |
415 | 364 |
416 pPhrase->nToken = pExpr->pPhrase->nToken; | 365 pPhrase->nToken = pExpr->pPhrase->nToken; |
417 | 366 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); |
418 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); | 367 assert( rc==SQLITE_OK || pCsr==0 ); |
419 if( pCsr ){ | 368 if( pCsr ){ |
420 int iFirst = 0; | 369 int iFirst = 0; |
421 pPhrase->pList = pCsr; | 370 pPhrase->pList = pCsr; |
422 fts3GetDeltaPosition(&pCsr, &iFirst); | 371 fts3GetDeltaPosition(&pCsr, &iFirst); |
| 372 assert( iFirst>=0 ); |
423 pPhrase->pHead = pCsr; | 373 pPhrase->pHead = pCsr; |
424 pPhrase->pTail = pCsr; | 374 pPhrase->pTail = pCsr; |
425 pPhrase->iHead = iFirst; | 375 pPhrase->iHead = iFirst; |
426 pPhrase->iTail = iFirst; | 376 pPhrase->iTail = iFirst; |
427 }else{ | 377 }else{ |
428 assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); | 378 assert( rc!=SQLITE_OK || ( |
| 379 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 |
| 380 )); |
429 } | 381 } |
430 | 382 |
431 return SQLITE_OK; | 383 return rc; |
432 } | 384 } |
433 | 385 |
434 /* | 386 /* |
435 ** Select the fragment of text consisting of nFragment contiguous tokens | 387 ** Select the fragment of text consisting of nFragment contiguous tokens |
436 ** from column iCol that represent the "best" snippet. The best snippet | 388 ** from column iCol that represent the "best" snippet. The best snippet |
437 ** is the snippet with the highest score, where scores are calculated | 389 ** is the snippet with the highest score, where scores are calculated |
438 ** by adding: | 390 ** by adding: |
439 ** | 391 ** |
440 ** (a) +1 point for each occurence of a matchable phrase in the snippet. | 392 ** (a) +1 point for each occurrence of a matchable phrase in the snippet. |
441 ** | 393 ** |
442 ** (b) +1000 points for the first occurence of each matchable phrase in | 394 ** (b) +1000 points for the first occurrence of each matchable phrase in |
443 ** the snippet for which the corresponding mCovered bit is not set. | 395 ** the snippet for which the corresponding mCovered bit is not set. |
444 ** | 396 ** |
445 ** The selected snippet parameters are stored in structure *pFragment before | 397 ** The selected snippet parameters are stored in structure *pFragment before |
446 ** returning. The score of the selected snippet is stored in *piScore | 398 ** returning. The score of the selected snippet is stored in *piScore |
447 ** before returning. | 399 ** before returning. |
448 */ | 400 */ |
449 static int fts3BestSnippet( | 401 static int fts3BestSnippet( |
450 int nSnippet, /* Desired snippet length */ | 402 int nSnippet, /* Desired snippet length */ |
451 Fts3Cursor *pCsr, /* Cursor to create snippet for */ | 403 Fts3Cursor *pCsr, /* Cursor to create snippet for */ |
452 int iCol, /* Index of column to create snippet from */ | 404 int iCol, /* Index of column to create snippet from */ |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
545 */ | 497 */ |
546 if( pStr->n+nAppend+1>=pStr->nAlloc ){ | 498 if( pStr->n+nAppend+1>=pStr->nAlloc ){ |
547 int nAlloc = pStr->nAlloc+nAppend+100; | 499 int nAlloc = pStr->nAlloc+nAppend+100; |
548 char *zNew = sqlite3_realloc(pStr->z, nAlloc); | 500 char *zNew = sqlite3_realloc(pStr->z, nAlloc); |
549 if( !zNew ){ | 501 if( !zNew ){ |
550 return SQLITE_NOMEM; | 502 return SQLITE_NOMEM; |
551 } | 503 } |
552 pStr->z = zNew; | 504 pStr->z = zNew; |
553 pStr->nAlloc = nAlloc; | 505 pStr->nAlloc = nAlloc; |
554 } | 506 } |
| 507 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); |
555 | 508 |
556 /* Append the data to the string buffer. */ | 509 /* Append the data to the string buffer. */ |
557 memcpy(&pStr->z[pStr->n], zAppend, nAppend); | 510 memcpy(&pStr->z[pStr->n], zAppend, nAppend); |
558 pStr->n += nAppend; | 511 pStr->n += nAppend; |
559 pStr->z[pStr->n] = '\0'; | 512 pStr->z[pStr->n] = '\0'; |
560 | 513 |
561 return SQLITE_OK; | 514 return SQLITE_OK; |
562 } | 515 } |
563 | 516 |
564 /* | 517 /* |
(...skipping 11 matching lines...) Expand all Loading... |
576 ** | 529 ** |
577 ** ....X.....X.... | 530 ** ....X.....X.... |
578 ** | 531 ** |
579 ** This is done as part of extracting the snippet text, not when selecting | 532 ** This is done as part of extracting the snippet text, not when selecting |
580 ** the snippet. Snippet selection is done based on doclists only, so there | 533 ** the snippet. Snippet selection is done based on doclists only, so there |
581 ** is no way for fts3BestSnippet() to know whether or not the document | 534 ** is no way for fts3BestSnippet() to know whether or not the document |
582 ** actually contains terms that follow the final highlighted term. | 535 ** actually contains terms that follow the final highlighted term. |
583 */ | 536 */ |
584 static int fts3SnippetShift( | 537 static int fts3SnippetShift( |
585 Fts3Table *pTab, /* FTS3 table snippet comes from */ | 538 Fts3Table *pTab, /* FTS3 table snippet comes from */ |
| 539 int iLangid, /* Language id to use in tokenizing */ |
586 int nSnippet, /* Number of tokens desired for snippet */ | 540 int nSnippet, /* Number of tokens desired for snippet */ |
587 const char *zDoc, /* Document text to extract snippet from */ | 541 const char *zDoc, /* Document text to extract snippet from */ |
588 int nDoc, /* Size of buffer zDoc in bytes */ | 542 int nDoc, /* Size of buffer zDoc in bytes */ |
589 int *piPos, /* IN/OUT: First token of snippet */ | 543 int *piPos, /* IN/OUT: First token of snippet */ |
590 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ | 544 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ |
591 ){ | 545 ){ |
592 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ | 546 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ |
593 | 547 |
594 if( hlmask ){ | 548 if( hlmask ){ |
595 int nLeft; /* Tokens to the left of first highlight */ | 549 int nLeft; /* Tokens to the left of first highlight */ |
(...skipping 15 matching lines...) Expand all Loading... |
611 int nShift; /* Number of tokens to shift snippet by */ | 565 int nShift; /* Number of tokens to shift snippet by */ |
612 int iCurrent = 0; /* Token counter */ | 566 int iCurrent = 0; /* Token counter */ |
613 int rc; /* Return Code */ | 567 int rc; /* Return Code */ |
614 sqlite3_tokenizer_module *pMod; | 568 sqlite3_tokenizer_module *pMod; |
615 sqlite3_tokenizer_cursor *pC; | 569 sqlite3_tokenizer_cursor *pC; |
616 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | 570 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
617 | 571 |
618 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) | 572 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) |
619 ** or more tokens in zDoc/nDoc. | 573 ** or more tokens in zDoc/nDoc. |
620 */ | 574 */ |
621 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); | 575 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); |
622 if( rc!=SQLITE_OK ){ | 576 if( rc!=SQLITE_OK ){ |
623 return rc; | 577 return rc; |
624 } | 578 } |
625 pC->pTokenizer = pTab->pTokenizer; | |
626 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ | 579 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ |
627 const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; | 580 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; |
628 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); | 581 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); |
629 } | 582 } |
630 pMod->xClose(pC); | 583 pMod->xClose(pC); |
631 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } | 584 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } |
632 | 585 |
633 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; | 586 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; |
634 assert( nShift<=nDesired ); | 587 assert( nShift<=nDesired ); |
635 if( nShift>0 ){ | 588 if( nShift>0 ){ |
636 *piPos += nShift; | 589 *piPos += nShift; |
637 *pHlmask = hlmask >> nShift; | 590 *pHlmask = hlmask >> nShift; |
(...skipping 23 matching lines...) Expand all Loading... |
661 const char *zDoc; /* Document text to extract snippet from */ | 614 const char *zDoc; /* Document text to extract snippet from */ |
662 int nDoc; /* Size of zDoc in bytes */ | 615 int nDoc; /* Size of zDoc in bytes */ |
663 int iCurrent = 0; /* Current token number of document */ | 616 int iCurrent = 0; /* Current token number of document */ |
664 int iEnd = 0; /* Byte offset of end of current token */ | 617 int iEnd = 0; /* Byte offset of end of current token */ |
665 int isShiftDone = 0; /* True after snippet is shifted */ | 618 int isShiftDone = 0; /* True after snippet is shifted */ |
666 int iPos = pFragment->iPos; /* First token of snippet */ | 619 int iPos = pFragment->iPos; /* First token of snippet */ |
667 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ | 620 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ |
668 int iCol = pFragment->iCol+1; /* Query column to extract text from */ | 621 int iCol = pFragment->iCol+1; /* Query column to extract text from */ |
669 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ | 622 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ |
670 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ | 623 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ |
671 const char *ZDUMMY; /* Dummy argument used with tokenizer */ | |
672 int DUMMY1; /* Dummy argument used with tokenizer */ | |
673 | 624 |
674 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); | 625 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); |
675 if( zDoc==0 ){ | 626 if( zDoc==0 ){ |
676 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ | 627 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ |
677 return SQLITE_NOMEM; | 628 return SQLITE_NOMEM; |
678 } | 629 } |
679 return SQLITE_OK; | 630 return SQLITE_OK; |
680 } | 631 } |
681 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); | 632 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); |
682 | 633 |
683 /* Open a token cursor on the document. */ | 634 /* Open a token cursor on the document. */ |
684 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; | 635 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; |
685 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); | 636 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); |
686 if( rc!=SQLITE_OK ){ | 637 if( rc!=SQLITE_OK ){ |
687 return rc; | 638 return rc; |
688 } | 639 } |
689 pC->pTokenizer = pTab->pTokenizer; | |
690 | 640 |
691 while( rc==SQLITE_OK ){ | 641 while( rc==SQLITE_OK ){ |
692 int iBegin; /* Offset in zDoc of start of token */ | 642 const char *ZDUMMY; /* Dummy argument used with tokenizer */ |
693 int iFin; /* Offset in zDoc of end of token */ | 643 int DUMMY1 = -1; /* Dummy argument used with tokenizer */ |
694 int isHighlight; /* True for highlighted terms */ | 644 int iBegin = 0; /* Offset in zDoc of start of token */ |
| 645 int iFin = 0; /* Offset in zDoc of end of token */ |
| 646 int isHighlight = 0; /* True for highlighted terms */ |
695 | 647 |
| 648 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere |
| 649 ** in the FTS code the variable that the third argument to xNext points to |
| 650 ** is initialized to zero before the first (*but not necessarily |
| 651 ** subsequent*) call to xNext(). This is done for a particular application |
| 652 ** that needs to know whether or not the tokenizer is being used for |
| 653 ** snippet generation or for some other purpose. |
| 654 ** |
| 655 ** Extreme care is required when writing code to depend on this |
| 656 ** initialization. It is not a documented part of the tokenizer interface. |
| 657 ** If a tokenizer is used directly by any code outside of FTS, this |
| 658 ** convention might not be respected. */ |
696 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); | 659 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); |
697 if( rc!=SQLITE_OK ){ | 660 if( rc!=SQLITE_OK ){ |
698 if( rc==SQLITE_DONE ){ | 661 if( rc==SQLITE_DONE ){ |
699 /* Special case - the last token of the snippet is also the last token | 662 /* Special case - the last token of the snippet is also the last token |
700 ** of the column. Append any punctuation that occurred between the end | 663 ** of the column. Append any punctuation that occurred between the end |
701 ** of the previous token and the end of the document to the output. | 664 ** of the previous token and the end of the document to the output. |
702 ** Then break out of the loop. */ | 665 ** Then break out of the loop. */ |
703 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); | 666 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); |
704 } | 667 } |
705 break; | 668 break; |
706 } | 669 } |
707 if( iCurrent<iPos ){ continue; } | 670 if( iCurrent<iPos ){ continue; } |
708 | 671 |
709 if( !isShiftDone ){ | 672 if( !isShiftDone ){ |
710 int n = nDoc - iBegin; | 673 int n = nDoc - iBegin; |
711 rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); | 674 rc = fts3SnippetShift( |
| 675 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask |
| 676 ); |
712 isShiftDone = 1; | 677 isShiftDone = 1; |
713 | 678 |
714 /* Now that the shift has been done, check if the initial "..." are | 679 /* Now that the shift has been done, check if the initial "..." are |
715 ** required. They are required if (a) this is not the first fragment, | 680 ** required. They are required if (a) this is not the first fragment, |
716 ** or (b) this fragment does not begin at position 0 of its column. | 681 ** or (b) this fragment does not begin at position 0 of its column. |
717 */ | 682 */ |
718 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ | 683 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ |
719 rc = fts3StringAppend(pOut, zEllipsis, -1); | 684 rc = fts3StringAppend(pOut, zEllipsis, -1); |
720 } | 685 } |
721 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; | 686 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
765 /* A column-list is terminated by either a 0x01 or 0x00. */ | 730 /* A column-list is terminated by either a 0x01 or 0x00. */ |
766 while( 0xFE & (*pEnd | c) ){ | 731 while( 0xFE & (*pEnd | c) ){ |
767 c = *pEnd++ & 0x80; | 732 c = *pEnd++ & 0x80; |
768 if( !c ) nEntry++; | 733 if( !c ) nEntry++; |
769 } | 734 } |
770 | 735 |
771 *ppCollist = pEnd; | 736 *ppCollist = pEnd; |
772 return nEntry; | 737 return nEntry; |
773 } | 738 } |
774 | 739 |
775 static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){ | |
776 char *pCsr = *pp; | |
777 while( *pCsr ){ | |
778 int nHit; | |
779 sqlite3_int64 iCol = 0; | |
780 if( *pCsr==0x01 ){ | |
781 pCsr++; | |
782 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); | |
783 } | |
784 nHit = fts3ColumnlistCount(&pCsr); | |
785 assert( nHit>0 ); | |
786 if( isGlobal ){ | |
787 aOut[iCol*3+1]++; | |
788 } | |
789 aOut[iCol*3] += nHit; | |
790 } | |
791 pCsr++; | |
792 *pp = pCsr; | |
793 } | |
794 | |
795 /* | 740 /* |
796 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats | 741 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats |
797 ** for a single query. | 742 ** for a single query. |
798 ** | 743 ** |
799 ** fts3ExprIterate() callback to load the 'global' elements of a | 744 ** fts3ExprIterate() callback to load the 'global' elements of a |
800 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements | 745 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements |
801 ** of the matchinfo array that are constant for all rows returned by the | 746 ** of the matchinfo array that are constant for all rows returned by the |
802 ** current query. | 747 ** current query. |
803 ** | 748 ** |
804 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This | 749 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This |
(...skipping 13 matching lines...) Expand all Loading... |
818 ** file system. This is done because the full-text index doclist is required | 763 ** file system. This is done because the full-text index doclist is required |
819 ** to calculate these values properly, and the full-text index doclist is | 764 ** to calculate these values properly, and the full-text index doclist is |
820 ** not available for deferred tokens. | 765 ** not available for deferred tokens. |
821 */ | 766 */ |
822 static int fts3ExprGlobalHitsCb( | 767 static int fts3ExprGlobalHitsCb( |
823 Fts3Expr *pExpr, /* Phrase expression node */ | 768 Fts3Expr *pExpr, /* Phrase expression node */ |
824 int iPhrase, /* Phrase number (numbered from zero) */ | 769 int iPhrase, /* Phrase number (numbered from zero) */ |
825 void *pCtx /* Pointer to MatchInfo structure */ | 770 void *pCtx /* Pointer to MatchInfo structure */ |
826 ){ | 771 ){ |
827 MatchInfo *p = (MatchInfo *)pCtx; | 772 MatchInfo *p = (MatchInfo *)pCtx; |
828 Fts3Cursor *pCsr = p->pCursor; | 773 return sqlite3Fts3EvalPhraseStats( |
829 char *pIter; | 774 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] |
830 char *pEnd; | 775 ); |
831 char *pFree = 0; | |
832 u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol]; | |
833 | |
834 assert( pExpr->isLoaded ); | |
835 assert( pExpr->eType==FTSQUERY_PHRASE ); | |
836 | |
837 if( pCsr->pDeferred ){ | |
838 Fts3Phrase *pPhrase = pExpr->pPhrase; | |
839 int ii; | |
840 for(ii=0; ii<pPhrase->nToken; ii++){ | |
841 if( pPhrase->aToken[ii].bFulltext ) break; | |
842 } | |
843 if( ii<pPhrase->nToken ){ | |
844 int nFree = 0; | |
845 int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); | |
846 if( rc!=SQLITE_OK ) return rc; | |
847 pIter = pFree; | |
848 pEnd = &pFree[nFree]; | |
849 }else{ | |
850 int iCol; /* Column index */ | |
851 for(iCol=0; iCol<p->nCol; iCol++){ | |
852 aOut[iCol*3 + 1] = (u32)p->nDoc; | |
853 aOut[iCol*3 + 2] = (u32)p->nDoc; | |
854 } | |
855 return SQLITE_OK; | |
856 } | |
857 }else{ | |
858 pIter = pExpr->aDoclist; | |
859 pEnd = &pExpr->aDoclist[pExpr->nDoclist]; | |
860 } | |
861 | |
862 /* Fill in the global hit count matrix row for this phrase. */ | |
863 while( pIter<pEnd ){ | |
864 while( *pIter++ & 0x80 ); /* Skip past docid. */ | |
865 fts3LoadColumnlistCounts(&pIter, &aOut[1], 1); | |
866 } | |
867 | |
868 sqlite3_free(pFree); | |
869 return SQLITE_OK; | |
870 } | 776 } |
871 | 777 |
872 /* | 778 /* |
873 ** fts3ExprIterate() callback used to collect the "local" part of the | 779 ** fts3ExprIterate() callback used to collect the "local" part of the |
874 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the | 780 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the |
875 ** array that are different for each row returned by the query. | 781 ** array that are different for each row returned by the query. |
876 */ | 782 */ |
877 static int fts3ExprLocalHitsCb( | 783 static int fts3ExprLocalHitsCb( |
878 Fts3Expr *pExpr, /* Phrase expression node */ | 784 Fts3Expr *pExpr, /* Phrase expression node */ |
879 int iPhrase, /* Phrase number */ | 785 int iPhrase, /* Phrase number */ |
880 void *pCtx /* Pointer to MatchInfo structure */ | 786 void *pCtx /* Pointer to MatchInfo structure */ |
881 ){ | 787 ){ |
| 788 int rc = SQLITE_OK; |
882 MatchInfo *p = (MatchInfo *)pCtx; | 789 MatchInfo *p = (MatchInfo *)pCtx; |
883 int iStart = iPhrase * p->nCol * 3; | 790 int iStart = iPhrase * p->nCol * 3; |
884 int i; | 791 int i; |
885 | 792 |
886 for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0; | 793 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ |
887 | |
888 if( pExpr->aDoclist ){ | |
889 char *pCsr; | 794 char *pCsr; |
890 | 795 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); |
891 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); | |
892 if( pCsr ){ | 796 if( pCsr ){ |
893 fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); | 797 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); |
| 798 }else{ |
| 799 p->aMatchinfo[iStart+i*3] = 0; |
894 } | 800 } |
895 } | 801 } |
896 | 802 |
897 return SQLITE_OK; | 803 return rc; |
898 } | 804 } |
899 | 805 |
900 static int fts3MatchinfoCheck( | 806 static int fts3MatchinfoCheck( |
901 Fts3Table *pTab, | 807 Fts3Table *pTab, |
902 char cArg, | 808 char cArg, |
903 char **pzErr | 809 char **pzErr |
904 ){ | 810 ){ |
905 if( (cArg==FTS3_MATCHINFO_NPHRASE) | 811 if( (cArg==FTS3_MATCHINFO_NPHRASE) |
906 || (cArg==FTS3_MATCHINFO_NCOL) | 812 || (cArg==FTS3_MATCHINFO_NCOL) |
907 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) | 813 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) |
908 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) | 814 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) |
909 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) | 815 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) |
910 || (cArg==FTS3_MATCHINFO_LCS) | 816 || (cArg==FTS3_MATCHINFO_LCS) |
911 || (cArg==FTS3_MATCHINFO_HITS) | 817 || (cArg==FTS3_MATCHINFO_HITS) |
912 ){ | 818 ){ |
913 return SQLITE_OK; | 819 return SQLITE_OK; |
914 } | 820 } |
915 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); | 821 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); |
916 return SQLITE_ERROR; | 822 return SQLITE_ERROR; |
917 } | 823 } |
918 | 824 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
953 | 859 |
954 if( !*ppStmt ){ | 860 if( !*ppStmt ){ |
955 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); | 861 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); |
956 if( rc!=SQLITE_OK ) return rc; | 862 if( rc!=SQLITE_OK ) return rc; |
957 } | 863 } |
958 pStmt = *ppStmt; | 864 pStmt = *ppStmt; |
959 assert( sqlite3_data_count(pStmt)==1 ); | 865 assert( sqlite3_data_count(pStmt)==1 ); |
960 | 866 |
961 a = sqlite3_column_blob(pStmt, 0); | 867 a = sqlite3_column_blob(pStmt, 0); |
962 a += sqlite3Fts3GetVarint(a, &nDoc); | 868 a += sqlite3Fts3GetVarint(a, &nDoc); |
963 if( nDoc==0 ) return SQLITE_CORRUPT; | 869 if( nDoc==0 ) return FTS_CORRUPT_VTAB; |
964 *pnDoc = (u32)nDoc; | 870 *pnDoc = (u32)nDoc; |
965 | 871 |
966 if( paLen ) *paLen = a; | 872 if( paLen ) *paLen = a; |
967 return SQLITE_OK; | 873 return SQLITE_OK; |
968 } | 874 } |
969 | 875 |
970 /* | 876 /* |
971 ** An instance of the following structure is used to store state while | 877 ** An instance of the following structure is used to store state while |
972 ** iterating through a multi-column position-list corresponding to the | 878 ** iterating through a multi-column position-list corresponding to the |
973 ** hits for a single phrase on a single row in order to calculate the | 879 ** hits for a single phrase on a single row in order to calculate the |
974 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. | 880 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. |
975 */ | 881 */ |
976 typedef struct LcsIterator LcsIterator; | 882 typedef struct LcsIterator LcsIterator; |
977 struct LcsIterator { | 883 struct LcsIterator { |
978 Fts3Expr *pExpr; /* Pointer to phrase expression */ | 884 Fts3Expr *pExpr; /* Pointer to phrase expression */ |
| 885 int iPosOffset; /* Tokens count up to end of this phrase */ |
979 char *pRead; /* Cursor used to iterate through aDoclist */ | 886 char *pRead; /* Cursor used to iterate through aDoclist */ |
980 int iPosOffset; /* Tokens count up to end of this phrase */ | |
981 int iCol; /* Current column number */ | |
982 int iPos; /* Current position */ | 887 int iPos; /* Current position */ |
983 }; | 888 }; |
984 | 889 |
985 /* | 890 /* |
986 ** If LcsIterator.iCol is set to the following value, the iterator has | 891 ** If LcsIterator.iCol is set to the following value, the iterator has |
987 ** finished iterating through all offsets for all columns. | 892 ** finished iterating through all offsets for all columns. |
988 */ | 893 */ |
989 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; | 894 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; |
990 | 895 |
991 static int fts3MatchinfoLcsCb( | 896 static int fts3MatchinfoLcsCb( |
(...skipping 10 matching lines...) Expand all Loading... |
1002 ** Advance the iterator passed as an argument to the next position. Return | 907 ** Advance the iterator passed as an argument to the next position. Return |
1003 ** 1 if the iterator is at EOF or if it now points to the start of the | 908 ** 1 if the iterator is at EOF or if it now points to the start of the |
1004 ** position list for the next column. | 909 ** position list for the next column. |
1005 */ | 910 */ |
1006 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ | 911 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ |
1007 char *pRead = pIter->pRead; | 912 char *pRead = pIter->pRead; |
1008 sqlite3_int64 iRead; | 913 sqlite3_int64 iRead; |
1009 int rc = 0; | 914 int rc = 0; |
1010 | 915 |
1011 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | 916 pRead += sqlite3Fts3GetVarint(pRead, &iRead); |
1012 if( iRead==0 ){ | 917 if( iRead==0 || iRead==1 ){ |
1013 pIter->iCol = LCS_ITERATOR_FINISHED; | 918 pRead = 0; |
1014 rc = 1; | 919 rc = 1; |
1015 }else{ | 920 }else{ |
1016 if( iRead==1 ){ | |
1017 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | |
1018 pIter->iCol = (int)iRead; | |
1019 pIter->iPos = pIter->iPosOffset; | |
1020 pRead += sqlite3Fts3GetVarint(pRead, &iRead); | |
1021 rc = 1; | |
1022 } | |
1023 pIter->iPos += (int)(iRead-2); | 921 pIter->iPos += (int)(iRead-2); |
1024 } | 922 } |
1025 | 923 |
1026 pIter->pRead = pRead; | 924 pIter->pRead = pRead; |
1027 return rc; | 925 return rc; |
1028 } | 926 } |
1029 | 927 |
1030 /* | 928 /* |
1031 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. | 929 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. |
1032 ** | 930 ** |
(...skipping 11 matching lines...) Expand all Loading... |
1044 int iCol; | 942 int iCol; |
1045 int nToken = 0; | 943 int nToken = 0; |
1046 | 944 |
1047 /* Allocate and populate the array of LcsIterator objects. The array | 945 /* Allocate and populate the array of LcsIterator objects. The array |
1048 ** contains one element for each matchable phrase in the query. | 946 ** contains one element for each matchable phrase in the query. |
1049 **/ | 947 **/ |
1050 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); | 948 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); |
1051 if( !aIter ) return SQLITE_NOMEM; | 949 if( !aIter ) return SQLITE_NOMEM; |
1052 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); | 950 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); |
1053 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); | 951 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); |
| 952 |
1054 for(i=0; i<pInfo->nPhrase; i++){ | 953 for(i=0; i<pInfo->nPhrase; i++){ |
1055 LcsIterator *pIter = &aIter[i]; | 954 LcsIterator *pIter = &aIter[i]; |
1056 nToken -= pIter->pExpr->pPhrase->nToken; | 955 nToken -= pIter->pExpr->pPhrase->nToken; |
1057 pIter->iPosOffset = nToken; | 956 pIter->iPosOffset = nToken; |
1058 pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1); | |
1059 if( pIter->pRead ){ | |
1060 pIter->iPos = pIter->iPosOffset; | |
1061 fts3LcsIteratorAdvance(&aIter[i]); | |
1062 }else{ | |
1063 pIter->iCol = LCS_ITERATOR_FINISHED; | |
1064 } | |
1065 } | 957 } |
1066 | 958 |
1067 for(iCol=0; iCol<pInfo->nCol; iCol++){ | 959 for(iCol=0; iCol<pInfo->nCol; iCol++){ |
1068 int nLcs = 0; /* LCS value for this column */ | 960 int nLcs = 0; /* LCS value for this column */ |
1069 int nLive = 0; /* Number of iterators in aIter not at EOF */ | 961 int nLive = 0; /* Number of iterators in aIter not at EOF */ |
1070 | 962 |
1071 /* Loop through the iterators in aIter[]. Set nLive to the number of | |
1072 ** iterators that point to a position-list corresponding to column iCol. | |
1073 */ | |
1074 for(i=0; i<pInfo->nPhrase; i++){ | 963 for(i=0; i<pInfo->nPhrase; i++){ |
1075 assert( aIter[i].iCol>=iCol ); | 964 int rc; |
1076 if( aIter[i].iCol==iCol ) nLive++; | 965 LcsIterator *pIt = &aIter[i]; |
| 966 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); |
| 967 if( rc!=SQLITE_OK ) return rc; |
| 968 if( pIt->pRead ){ |
| 969 pIt->iPos = pIt->iPosOffset; |
| 970 fts3LcsIteratorAdvance(&aIter[i]); |
| 971 nLive++; |
| 972 } |
1077 } | 973 } |
1078 | 974 |
1079 /* The following loop runs until all iterators in aIter[] have finished | |
1080 ** iterating through positions in column iCol. Exactly one of the | |
1081 ** iterators is advanced each time the body of the loop is run. | |
1082 */ | |
1083 while( nLive>0 ){ | 975 while( nLive>0 ){ |
1084 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ | 976 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ |
1085 int nThisLcs = 0; /* LCS for the current iterator positions */ | 977 int nThisLcs = 0; /* LCS for the current iterator positions */ |
1086 | 978 |
1087 for(i=0; i<pInfo->nPhrase; i++){ | 979 for(i=0; i<pInfo->nPhrase; i++){ |
1088 LcsIterator *pIter = &aIter[i]; | 980 LcsIterator *pIter = &aIter[i]; |
1089 if( iCol!=pIter->iCol ){ | 981 if( pIter->pRead==0 ){ |
1090 /* This iterator is already at EOF for this column. */ | 982 /* This iterator is already at EOF for this column. */ |
1091 nThisLcs = 0; | 983 nThisLcs = 0; |
1092 }else{ | 984 }else{ |
1093 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ | 985 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ |
1094 pAdv = pIter; | 986 pAdv = pIter; |
1095 } | 987 } |
1096 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ | 988 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ |
1097 nThisLcs++; | 989 nThisLcs++; |
1098 }else{ | 990 }else{ |
1099 nThisLcs = 1; | 991 nThisLcs = 1; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1145 case FTS3_MATCHINFO_NPHRASE: | 1037 case FTS3_MATCHINFO_NPHRASE: |
1146 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; | 1038 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; |
1147 break; | 1039 break; |
1148 | 1040 |
1149 case FTS3_MATCHINFO_NCOL: | 1041 case FTS3_MATCHINFO_NCOL: |
1150 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; | 1042 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; |
1151 break; | 1043 break; |
1152 | 1044 |
1153 case FTS3_MATCHINFO_NDOC: | 1045 case FTS3_MATCHINFO_NDOC: |
1154 if( bGlobal ){ | 1046 if( bGlobal ){ |
1155 sqlite3_int64 nDoc; | 1047 sqlite3_int64 nDoc = 0; |
1156 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); | 1048 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); |
1157 pInfo->aMatchinfo[0] = (u32)nDoc; | 1049 pInfo->aMatchinfo[0] = (u32)nDoc; |
1158 } | 1050 } |
1159 break; | 1051 break; |
1160 | 1052 |
1161 case FTS3_MATCHINFO_AVGLENGTH: | 1053 case FTS3_MATCHINFO_AVGLENGTH: |
1162 if( bGlobal ){ | 1054 if( bGlobal ){ |
1163 sqlite3_int64 nDoc; /* Number of rows in table */ | 1055 sqlite3_int64 nDoc; /* Number of rows in table */ |
1164 const char *a; /* Aggregate column length array */ | 1056 const char *a; /* Aggregate column length array */ |
1165 | 1057 |
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1401 typedef struct TermOffset TermOffset; | 1293 typedef struct TermOffset TermOffset; |
1402 typedef struct TermOffsetCtx TermOffsetCtx; | 1294 typedef struct TermOffsetCtx TermOffsetCtx; |
1403 | 1295 |
1404 struct TermOffset { | 1296 struct TermOffset { |
1405 char *pList; /* Position-list */ | 1297 char *pList; /* Position-list */ |
1406 int iPos; /* Position just read from pList */ | 1298 int iPos; /* Position just read from pList */ |
1407 int iOff; /* Offset of this term from read positions */ | 1299 int iOff; /* Offset of this term from read positions */ |
1408 }; | 1300 }; |
1409 | 1301 |
1410 struct TermOffsetCtx { | 1302 struct TermOffsetCtx { |
| 1303 Fts3Cursor *pCsr; |
1411 int iCol; /* Column of table to populate aTerm for */ | 1304 int iCol; /* Column of table to populate aTerm for */ |
1412 int iTerm; | 1305 int iTerm; |
1413 sqlite3_int64 iDocid; | 1306 sqlite3_int64 iDocid; |
1414 TermOffset *aTerm; | 1307 TermOffset *aTerm; |
1415 }; | 1308 }; |
1416 | 1309 |
1417 /* | 1310 /* |
1418 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). | 1311 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). |
1419 */ | 1312 */ |
1420 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ | 1313 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ |
1421 TermOffsetCtx *p = (TermOffsetCtx *)ctx; | 1314 TermOffsetCtx *p = (TermOffsetCtx *)ctx; |
1422 int nTerm; /* Number of tokens in phrase */ | 1315 int nTerm; /* Number of tokens in phrase */ |
1423 int iTerm; /* For looping through nTerm phrase terms */ | 1316 int iTerm; /* For looping through nTerm phrase terms */ |
1424 char *pList; /* Pointer to position list for phrase */ | 1317 char *pList; /* Pointer to position list for phrase */ |
1425 int iPos = 0; /* First position in position-list */ | 1318 int iPos = 0; /* First position in position-list */ |
| 1319 int rc; |
1426 | 1320 |
1427 UNUSED_PARAMETER(iPhrase); | 1321 UNUSED_PARAMETER(iPhrase); |
1428 pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); | 1322 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); |
1429 nTerm = pExpr->pPhrase->nToken; | 1323 nTerm = pExpr->pPhrase->nToken; |
1430 if( pList ){ | 1324 if( pList ){ |
1431 fts3GetDeltaPosition(&pList, &iPos); | 1325 fts3GetDeltaPosition(&pList, &iPos); |
1432 assert( iPos>=0 ); | 1326 assert( iPos>=0 ); |
1433 } | 1327 } |
1434 | 1328 |
1435 for(iTerm=0; iTerm<nTerm; iTerm++){ | 1329 for(iTerm=0; iTerm<nTerm; iTerm++){ |
1436 TermOffset *pT = &p->aTerm[p->iTerm++]; | 1330 TermOffset *pT = &p->aTerm[p->iTerm++]; |
1437 pT->iOff = nTerm-iTerm-1; | 1331 pT->iOff = nTerm-iTerm-1; |
1438 pT->pList = pList; | 1332 pT->pList = pList; |
1439 pT->iPos = iPos; | 1333 pT->iPos = iPos; |
1440 } | 1334 } |
1441 | 1335 |
1442 return SQLITE_OK; | 1336 return rc; |
1443 } | 1337 } |
1444 | 1338 |
1445 /* | 1339 /* |
1446 ** Implementation of offsets() function. | 1340 ** Implementation of offsets() function. |
1447 */ | 1341 */ |
1448 void sqlite3Fts3Offsets( | 1342 void sqlite3Fts3Offsets( |
1449 sqlite3_context *pCtx, /* SQLite function call context */ | 1343 sqlite3_context *pCtx, /* SQLite function call context */ |
1450 Fts3Cursor *pCsr /* Cursor object */ | 1344 Fts3Cursor *pCsr /* Cursor object */ |
1451 ){ | 1345 ){ |
1452 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; | 1346 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; |
1453 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; | 1347 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; |
1454 const char *ZDUMMY; /* Dummy argument used with xNext() */ | |
1455 int NDUMMY; /* Dummy argument used with xNext() */ | |
1456 int rc; /* Return Code */ | 1348 int rc; /* Return Code */ |
1457 int nToken; /* Number of tokens in query */ | 1349 int nToken; /* Number of tokens in query */ |
1458 int iCol; /* Column currently being processed */ | 1350 int iCol; /* Column currently being processed */ |
1459 StrBuffer res = {0, 0, 0}; /* Result string */ | 1351 StrBuffer res = {0, 0, 0}; /* Result string */ |
1460 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ | 1352 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ |
1461 | 1353 |
1462 if( !pCsr->pExpr ){ | 1354 if( !pCsr->pExpr ){ |
1463 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); | 1355 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); |
1464 return; | 1356 return; |
1465 } | 1357 } |
1466 | 1358 |
1467 memset(&sCtx, 0, sizeof(sCtx)); | 1359 memset(&sCtx, 0, sizeof(sCtx)); |
1468 assert( pCsr->isRequireSeek==0 ); | 1360 assert( pCsr->isRequireSeek==0 ); |
1469 | 1361 |
1470 /* Count the number of terms in the query */ | 1362 /* Count the number of terms in the query */ |
1471 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); | 1363 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); |
1472 if( rc!=SQLITE_OK ) goto offsets_out; | 1364 if( rc!=SQLITE_OK ) goto offsets_out; |
1473 | 1365 |
1474 /* Allocate the array of TermOffset iterators. */ | 1366 /* Allocate the array of TermOffset iterators. */ |
1475 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); | 1367 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); |
1476 if( 0==sCtx.aTerm ){ | 1368 if( 0==sCtx.aTerm ){ |
1477 rc = SQLITE_NOMEM; | 1369 rc = SQLITE_NOMEM; |
1478 goto offsets_out; | 1370 goto offsets_out; |
1479 } | 1371 } |
1480 sCtx.iDocid = pCsr->iPrevId; | 1372 sCtx.iDocid = pCsr->iPrevId; |
| 1373 sCtx.pCsr = pCsr; |
1481 | 1374 |
1482 /* Loop through the table columns, appending offset information to | 1375 /* Loop through the table columns, appending offset information to |
1483 ** string-buffer res for each column. | 1376 ** string-buffer res for each column. |
1484 */ | 1377 */ |
1485 for(iCol=0; iCol<pTab->nColumn; iCol++){ | 1378 for(iCol=0; iCol<pTab->nColumn; iCol++){ |
1486 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ | 1379 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ |
1487 int iStart; | 1380 const char *ZDUMMY; /* Dummy argument used with xNext() */ |
1488 int iEnd; | 1381 int NDUMMY = 0; /* Dummy argument used with xNext() */ |
1489 int iCurrent; | 1382 int iStart = 0; |
| 1383 int iEnd = 0; |
| 1384 int iCurrent = 0; |
1490 const char *zDoc; | 1385 const char *zDoc; |
1491 int nDoc; | 1386 int nDoc; |
1492 | 1387 |
1493 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is | 1388 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is |
1494 ** no way that this operation can fail, so the return code from | 1389 ** no way that this operation can fail, so the return code from |
1495 ** fts3ExprIterate() can be discarded. | 1390 ** fts3ExprIterate() can be discarded. |
1496 */ | 1391 */ |
1497 sCtx.iCol = iCol; | 1392 sCtx.iCol = iCol; |
1498 sCtx.iTerm = 0; | 1393 sCtx.iTerm = 0; |
1499 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); | 1394 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); |
1500 | 1395 |
1501 /* Retreive the text stored in column iCol. If an SQL NULL is stored | 1396 /* Retreive the text stored in column iCol. If an SQL NULL is stored |
1502 ** in column iCol, jump immediately to the next iteration of the loop. | 1397 ** in column iCol, jump immediately to the next iteration of the loop. |
1503 ** If an OOM occurs while retrieving the data (this can happen if SQLite | 1398 ** If an OOM occurs while retrieving the data (this can happen if SQLite |
1504 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM | 1399 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM |
1505 ** to the caller. | 1400 ** to the caller. |
1506 */ | 1401 */ |
1507 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); | 1402 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); |
1508 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); | 1403 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
1509 if( zDoc==0 ){ | 1404 if( zDoc==0 ){ |
1510 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ | 1405 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ |
1511 continue; | 1406 continue; |
1512 } | 1407 } |
1513 rc = SQLITE_NOMEM; | 1408 rc = SQLITE_NOMEM; |
1514 goto offsets_out; | 1409 goto offsets_out; |
1515 } | 1410 } |
1516 | 1411 |
1517 /* Initialize a tokenizer iterator to iterate through column iCol. */ | 1412 /* Initialize a tokenizer iterator to iterate through column iCol. */ |
1518 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); | 1413 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, |
| 1414 zDoc, nDoc, &pC |
| 1415 ); |
1519 if( rc!=SQLITE_OK ) goto offsets_out; | 1416 if( rc!=SQLITE_OK ) goto offsets_out; |
1520 pC->pTokenizer = pTab->pTokenizer; | |
1521 | 1417 |
1522 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | 1418 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
1523 while( rc==SQLITE_OK ){ | 1419 while( rc==SQLITE_OK ){ |
1524 int i; /* Used to loop through terms */ | 1420 int i; /* Used to loop through terms */ |
1525 int iMinPos = 0x7FFFFFFF; /* Position of next token */ | 1421 int iMinPos = 0x7FFFFFFF; /* Position of next token */ |
1526 TermOffset *pTerm = 0; /* TermOffset associated with next token */ | 1422 TermOffset *pTerm = 0; /* TermOffset associated with next token */ |
1527 | 1423 |
1528 for(i=0; i<nToken; i++){ | 1424 for(i=0; i<nToken; i++){ |
1529 TermOffset *pT = &sCtx.aTerm[i]; | 1425 TermOffset *pT = &sCtx.aTerm[i]; |
1530 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ | 1426 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ |
1531 iMinPos = pT->iPos-pT->iOff; | 1427 iMinPos = pT->iPos-pT->iOff; |
1532 pTerm = pT; | 1428 pTerm = pT; |
1533 } | 1429 } |
1534 } | 1430 } |
1535 | 1431 |
1536 if( !pTerm ){ | 1432 if( !pTerm ){ |
1537 /* All offsets for this column have been gathered. */ | 1433 /* All offsets for this column have been gathered. */ |
1538 break; | 1434 rc = SQLITE_DONE; |
1539 }else{ | 1435 }else{ |
1540 assert( iCurrent<=iMinPos ); | 1436 assert( iCurrent<=iMinPos ); |
1541 if( 0==(0xFE&*pTerm->pList) ){ | 1437 if( 0==(0xFE&*pTerm->pList) ){ |
1542 pTerm->pList = 0; | 1438 pTerm->pList = 0; |
1543 }else{ | 1439 }else{ |
1544 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); | 1440 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); |
1545 } | 1441 } |
1546 while( rc==SQLITE_OK && iCurrent<iMinPos ){ | 1442 while( rc==SQLITE_OK && iCurrent<iMinPos ){ |
1547 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); | 1443 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); |
1548 } | 1444 } |
1549 if( rc==SQLITE_OK ){ | 1445 if( rc==SQLITE_OK ){ |
1550 char aBuffer[64]; | 1446 char aBuffer[64]; |
1551 sqlite3_snprintf(sizeof(aBuffer), aBuffer, | 1447 sqlite3_snprintf(sizeof(aBuffer), aBuffer, |
1552 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart | 1448 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart |
1553 ); | 1449 ); |
1554 rc = fts3StringAppend(&res, aBuffer, -1); | 1450 rc = fts3StringAppend(&res, aBuffer, -1); |
1555 }else if( rc==SQLITE_DONE ){ | 1451 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ |
1556 rc = SQLITE_CORRUPT; | 1452 rc = FTS_CORRUPT_VTAB; |
1557 } | 1453 } |
1558 } | 1454 } |
1559 } | 1455 } |
1560 if( rc==SQLITE_DONE ){ | 1456 if( rc==SQLITE_DONE ){ |
1561 rc = SQLITE_OK; | 1457 rc = SQLITE_OK; |
1562 } | 1458 } |
1563 | 1459 |
1564 pMod->xClose(pC); | 1460 pMod->xClose(pC); |
1565 if( rc!=SQLITE_OK ) goto offsets_out; | 1461 if( rc!=SQLITE_OK ) goto offsets_out; |
1566 } | 1462 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1616 | 1512 |
1617 if( rc!=SQLITE_OK ){ | 1513 if( rc!=SQLITE_OK ){ |
1618 sqlite3_result_error_code(pContext, rc); | 1514 sqlite3_result_error_code(pContext, rc); |
1619 }else{ | 1515 }else{ |
1620 int n = pCsr->nMatchinfo * sizeof(u32); | 1516 int n = pCsr->nMatchinfo * sizeof(u32); |
1621 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); | 1517 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); |
1622 } | 1518 } |
1623 } | 1519 } |
1624 | 1520 |
1625 #endif | 1521 #endif |
OLD | NEW |