Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: third_party/sqlite/src/ext/fts3/fts3_snippet.c

Issue 901033002: Import SQLite 3.8.7.4. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Chromium changes to support SQLite 3.8.7.4. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2009 Oct 23 2 ** 2009 Oct 23
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
11 ****************************************************************************** 11 ******************************************************************************
12 */ 12 */
13 13
14 #include "fts3Int.h"
14 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) 15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
15 16
16 #include "fts3Int.h"
17 #include <string.h> 17 #include <string.h>
18 #include <assert.h> 18 #include <assert.h>
19 19
20 /* 20 /*
21 ** Characters that may appear in the second argument to matchinfo(). 21 ** Characters that may appear in the second argument to matchinfo().
22 */ 22 */
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ 23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ 24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ 25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ 26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 ** 121 **
122 ** are encoded. 122 ** are encoded.
123 ** 123 **
124 ** When this function is called, *pp points to the start of an element of 124 ** When this function is called, *pp points to the start of an element of
125 ** the list. *piPos contains the value of the previous entry in the list. 125 ** the list. *piPos contains the value of the previous entry in the list.
126 ** After it returns, *piPos contains the value of the next element of the 126 ** After it returns, *piPos contains the value of the next element of the
127 ** list and *pp is advanced to the following varint. 127 ** list and *pp is advanced to the following varint.
128 */ 128 */
129 static void fts3GetDeltaPosition(char **pp, int *piPos){ 129 static void fts3GetDeltaPosition(char **pp, int *piPos){
130 int iVal; 130 int iVal;
131 *pp += sqlite3Fts3GetVarint32(*pp, &iVal); 131 *pp += fts3GetVarint32(*pp, &iVal);
132 *piPos += (iVal-2); 132 *piPos += (iVal-2);
133 } 133 }
134 134
135 /* 135 /*
136 ** Helper function for fts3ExprIterate() (see below). 136 ** Helper function for fts3ExprIterate() (see below).
137 */ 137 */
138 static int fts3ExprIterate2( 138 static int fts3ExprIterate2(
139 Fts3Expr *pExpr, /* Expression to iterate phrases of */ 139 Fts3Expr *pExpr, /* Expression to iterate phrases of */
140 int *piPhrase, /* Pointer to phrase counter */ 140 int *piPhrase, /* Pointer to phrase counter */
141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ 141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
(...skipping 28 matching lines...) Expand all
170 static int fts3ExprIterate( 170 static int fts3ExprIterate(
171 Fts3Expr *pExpr, /* Expression to iterate phrases of */ 171 Fts3Expr *pExpr, /* Expression to iterate phrases of */
172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ 172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
173 void *pCtx /* Second argument to pass to callback */ 173 void *pCtx /* Second argument to pass to callback */
174 ){ 174 ){
175 int iPhrase = 0; /* Variable used as the phrase counter */ 175 int iPhrase = 0; /* Variable used as the phrase counter */
176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); 176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
177 } 177 }
178 178
179 /* 179 /*
180 ** The argument to this function is always a phrase node. Its doclist
181 ** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes
182 ** to the left of this one in the query tree have already been loaded.
183 **
184 ** If this phrase node is part of a series of phrase nodes joined by
185 ** NEAR operators (and is not the left-most of said series), then elements are
186 ** removed from the phrases doclist consistent with the NEAR restriction. If
187 ** required, elements may be removed from the doclists of phrases to the
188 ** left of this one that are part of the same series of NEAR operator
189 ** connected phrases.
190 **
191 ** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
192 */
193 static int fts3ExprNearTrim(Fts3Expr *pExpr){
194 int rc = SQLITE_OK;
195 Fts3Expr *pParent = pExpr->pParent;
196
197 assert( pExpr->eType==FTSQUERY_PHRASE );
198 while( rc==SQLITE_OK
199 && pParent
200 && pParent->eType==FTSQUERY_NEAR
201 && pParent->pRight==pExpr
202 ){
203 /* This expression (pExpr) is the right-hand-side of a NEAR operator.
204 ** Find the expression to the left of the same operator.
205 */
206 int nNear = pParent->nNear;
207 Fts3Expr *pLeft = pParent->pLeft;
208
209 if( pLeft->eType!=FTSQUERY_PHRASE ){
210 assert( pLeft->eType==FTSQUERY_NEAR );
211 assert( pLeft->pRight->eType==FTSQUERY_PHRASE );
212 pLeft = pLeft->pRight;
213 }
214
215 rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear);
216
217 pExpr = pLeft;
218 pParent = pExpr->pParent;
219 }
220
221 return rc;
222 }
223
224 /*
225 ** This is an fts3ExprIterate() callback used while loading the doclists 180 ** This is an fts3ExprIterate() callback used while loading the doclists
226 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also 181 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
227 ** fts3ExprLoadDoclists(). 182 ** fts3ExprLoadDoclists().
228 */ 183 */
229 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ 184 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
230 int rc = SQLITE_OK; 185 int rc = SQLITE_OK;
186 Fts3Phrase *pPhrase = pExpr->pPhrase;
231 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; 187 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
232 188
233 UNUSED_PARAMETER(iPhrase); 189 UNUSED_PARAMETER(iPhrase);
234 190
235 p->nPhrase++; 191 p->nPhrase++;
236 p->nToken += pExpr->pPhrase->nToken; 192 p->nToken += pPhrase->nToken;
237
238 if( pExpr->isLoaded==0 ){
239 rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr);
240 pExpr->isLoaded = 1;
241 if( rc==SQLITE_OK ){
242 rc = fts3ExprNearTrim(pExpr);
243 }
244 }
245 193
246 return rc; 194 return rc;
247 } 195 }
248 196
249 /* 197 /*
250 ** Load the doclists for each phrase in the query associated with FTS3 cursor 198 ** Load the doclists for each phrase in the query associated with FTS3 cursor
251 ** pCsr. 199 ** pCsr.
252 ** 200 **
253 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable 201 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
254 ** phrases in the expression (all phrases except those directly or 202 ** phrases in the expression (all phrases except those directly or
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
405 } 353 }
406 354
407 /* 355 /*
408 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). 356 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
409 ** Each invocation populates an element of the SnippetIter.aPhrase[] array. 357 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
410 */ 358 */
411 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ 359 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
412 SnippetIter *p = (SnippetIter *)ctx; 360 SnippetIter *p = (SnippetIter *)ctx;
413 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; 361 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
414 char *pCsr; 362 char *pCsr;
363 int rc;
415 364
416 pPhrase->nToken = pExpr->pPhrase->nToken; 365 pPhrase->nToken = pExpr->pPhrase->nToken;
417 366 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
418 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); 367 assert( rc==SQLITE_OK || pCsr==0 );
419 if( pCsr ){ 368 if( pCsr ){
420 int iFirst = 0; 369 int iFirst = 0;
421 pPhrase->pList = pCsr; 370 pPhrase->pList = pCsr;
422 fts3GetDeltaPosition(&pCsr, &iFirst); 371 fts3GetDeltaPosition(&pCsr, &iFirst);
372 assert( iFirst>=0 );
423 pPhrase->pHead = pCsr; 373 pPhrase->pHead = pCsr;
424 pPhrase->pTail = pCsr; 374 pPhrase->pTail = pCsr;
425 pPhrase->iHead = iFirst; 375 pPhrase->iHead = iFirst;
426 pPhrase->iTail = iFirst; 376 pPhrase->iTail = iFirst;
427 }else{ 377 }else{
428 assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); 378 assert( rc!=SQLITE_OK || (
379 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
380 ));
429 } 381 }
430 382
431 return SQLITE_OK; 383 return rc;
432 } 384 }
433 385
434 /* 386 /*
435 ** Select the fragment of text consisting of nFragment contiguous tokens 387 ** Select the fragment of text consisting of nFragment contiguous tokens
436 ** from column iCol that represent the "best" snippet. The best snippet 388 ** from column iCol that represent the "best" snippet. The best snippet
437 ** is the snippet with the highest score, where scores are calculated 389 ** is the snippet with the highest score, where scores are calculated
438 ** by adding: 390 ** by adding:
439 ** 391 **
440 ** (a) +1 point for each occurence of a matchable phrase in the snippet. 392 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
441 ** 393 **
442 ** (b) +1000 points for the first occurence of each matchable phrase in 394 ** (b) +1000 points for the first occurrence of each matchable phrase in
443 ** the snippet for which the corresponding mCovered bit is not set. 395 ** the snippet for which the corresponding mCovered bit is not set.
444 ** 396 **
445 ** The selected snippet parameters are stored in structure *pFragment before 397 ** The selected snippet parameters are stored in structure *pFragment before
446 ** returning. The score of the selected snippet is stored in *piScore 398 ** returning. The score of the selected snippet is stored in *piScore
447 ** before returning. 399 ** before returning.
448 */ 400 */
449 static int fts3BestSnippet( 401 static int fts3BestSnippet(
450 int nSnippet, /* Desired snippet length */ 402 int nSnippet, /* Desired snippet length */
451 Fts3Cursor *pCsr, /* Cursor to create snippet for */ 403 Fts3Cursor *pCsr, /* Cursor to create snippet for */
452 int iCol, /* Index of column to create snippet from */ 404 int iCol, /* Index of column to create snippet from */
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 */ 497 */
546 if( pStr->n+nAppend+1>=pStr->nAlloc ){ 498 if( pStr->n+nAppend+1>=pStr->nAlloc ){
547 int nAlloc = pStr->nAlloc+nAppend+100; 499 int nAlloc = pStr->nAlloc+nAppend+100;
548 char *zNew = sqlite3_realloc(pStr->z, nAlloc); 500 char *zNew = sqlite3_realloc(pStr->z, nAlloc);
549 if( !zNew ){ 501 if( !zNew ){
550 return SQLITE_NOMEM; 502 return SQLITE_NOMEM;
551 } 503 }
552 pStr->z = zNew; 504 pStr->z = zNew;
553 pStr->nAlloc = nAlloc; 505 pStr->nAlloc = nAlloc;
554 } 506 }
507 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
555 508
556 /* Append the data to the string buffer. */ 509 /* Append the data to the string buffer. */
557 memcpy(&pStr->z[pStr->n], zAppend, nAppend); 510 memcpy(&pStr->z[pStr->n], zAppend, nAppend);
558 pStr->n += nAppend; 511 pStr->n += nAppend;
559 pStr->z[pStr->n] = '\0'; 512 pStr->z[pStr->n] = '\0';
560 513
561 return SQLITE_OK; 514 return SQLITE_OK;
562 } 515 }
563 516
564 /* 517 /*
(...skipping 11 matching lines...) Expand all
576 ** 529 **
577 ** ....X.....X.... 530 ** ....X.....X....
578 ** 531 **
579 ** This is done as part of extracting the snippet text, not when selecting 532 ** This is done as part of extracting the snippet text, not when selecting
580 ** the snippet. Snippet selection is done based on doclists only, so there 533 ** the snippet. Snippet selection is done based on doclists only, so there
581 ** is no way for fts3BestSnippet() to know whether or not the document 534 ** is no way for fts3BestSnippet() to know whether or not the document
582 ** actually contains terms that follow the final highlighted term. 535 ** actually contains terms that follow the final highlighted term.
583 */ 536 */
584 static int fts3SnippetShift( 537 static int fts3SnippetShift(
585 Fts3Table *pTab, /* FTS3 table snippet comes from */ 538 Fts3Table *pTab, /* FTS3 table snippet comes from */
539 int iLangid, /* Language id to use in tokenizing */
586 int nSnippet, /* Number of tokens desired for snippet */ 540 int nSnippet, /* Number of tokens desired for snippet */
587 const char *zDoc, /* Document text to extract snippet from */ 541 const char *zDoc, /* Document text to extract snippet from */
588 int nDoc, /* Size of buffer zDoc in bytes */ 542 int nDoc, /* Size of buffer zDoc in bytes */
589 int *piPos, /* IN/OUT: First token of snippet */ 543 int *piPos, /* IN/OUT: First token of snippet */
590 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ 544 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
591 ){ 545 ){
592 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ 546 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
593 547
594 if( hlmask ){ 548 if( hlmask ){
595 int nLeft; /* Tokens to the left of first highlight */ 549 int nLeft; /* Tokens to the left of first highlight */
(...skipping 15 matching lines...) Expand all
611 int nShift; /* Number of tokens to shift snippet by */ 565 int nShift; /* Number of tokens to shift snippet by */
612 int iCurrent = 0; /* Token counter */ 566 int iCurrent = 0; /* Token counter */
613 int rc; /* Return Code */ 567 int rc; /* Return Code */
614 sqlite3_tokenizer_module *pMod; 568 sqlite3_tokenizer_module *pMod;
615 sqlite3_tokenizer_cursor *pC; 569 sqlite3_tokenizer_cursor *pC;
616 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; 570 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
617 571
618 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) 572 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
619 ** or more tokens in zDoc/nDoc. 573 ** or more tokens in zDoc/nDoc.
620 */ 574 */
621 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); 575 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
622 if( rc!=SQLITE_OK ){ 576 if( rc!=SQLITE_OK ){
623 return rc; 577 return rc;
624 } 578 }
625 pC->pTokenizer = pTab->pTokenizer;
626 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ 579 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
627 const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; 580 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
628 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); 581 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
629 } 582 }
630 pMod->xClose(pC); 583 pMod->xClose(pC);
631 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } 584 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
632 585
633 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; 586 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
634 assert( nShift<=nDesired ); 587 assert( nShift<=nDesired );
635 if( nShift>0 ){ 588 if( nShift>0 ){
636 *piPos += nShift; 589 *piPos += nShift;
637 *pHlmask = hlmask >> nShift; 590 *pHlmask = hlmask >> nShift;
(...skipping 23 matching lines...) Expand all
661 const char *zDoc; /* Document text to extract snippet from */ 614 const char *zDoc; /* Document text to extract snippet from */
662 int nDoc; /* Size of zDoc in bytes */ 615 int nDoc; /* Size of zDoc in bytes */
663 int iCurrent = 0; /* Current token number of document */ 616 int iCurrent = 0; /* Current token number of document */
664 int iEnd = 0; /* Byte offset of end of current token */ 617 int iEnd = 0; /* Byte offset of end of current token */
665 int isShiftDone = 0; /* True after snippet is shifted */ 618 int isShiftDone = 0; /* True after snippet is shifted */
666 int iPos = pFragment->iPos; /* First token of snippet */ 619 int iPos = pFragment->iPos; /* First token of snippet */
667 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ 620 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
668 int iCol = pFragment->iCol+1; /* Query column to extract text from */ 621 int iCol = pFragment->iCol+1; /* Query column to extract text from */
669 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ 622 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
670 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ 623 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
671 const char *ZDUMMY; /* Dummy argument used with tokenizer */
672 int DUMMY1; /* Dummy argument used with tokenizer */
673 624
674 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); 625 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
675 if( zDoc==0 ){ 626 if( zDoc==0 ){
676 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ 627 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
677 return SQLITE_NOMEM; 628 return SQLITE_NOMEM;
678 } 629 }
679 return SQLITE_OK; 630 return SQLITE_OK;
680 } 631 }
681 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); 632 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
682 633
683 /* Open a token cursor on the document. */ 634 /* Open a token cursor on the document. */
684 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; 635 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
685 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); 636 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
686 if( rc!=SQLITE_OK ){ 637 if( rc!=SQLITE_OK ){
687 return rc; 638 return rc;
688 } 639 }
689 pC->pTokenizer = pTab->pTokenizer;
690 640
691 while( rc==SQLITE_OK ){ 641 while( rc==SQLITE_OK ){
692 int iBegin; /* Offset in zDoc of start of token */ 642 const char *ZDUMMY; /* Dummy argument used with tokenizer */
693 int iFin; /* Offset in zDoc of end of token */ 643 int DUMMY1 = -1; /* Dummy argument used with tokenizer */
694 int isHighlight; /* True for highlighted terms */ 644 int iBegin = 0; /* Offset in zDoc of start of token */
645 int iFin = 0; /* Offset in zDoc of end of token */
646 int isHighlight = 0; /* True for highlighted terms */
695 647
648 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
649 ** in the FTS code the variable that the third argument to xNext points to
650 ** is initialized to zero before the first (*but not necessarily
651 ** subsequent*) call to xNext(). This is done for a particular application
652 ** that needs to know whether or not the tokenizer is being used for
653 ** snippet generation or for some other purpose.
654 **
655 ** Extreme care is required when writing code to depend on this
656 ** initialization. It is not a documented part of the tokenizer interface.
657 ** If a tokenizer is used directly by any code outside of FTS, this
658 ** convention might not be respected. */
696 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); 659 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
697 if( rc!=SQLITE_OK ){ 660 if( rc!=SQLITE_OK ){
698 if( rc==SQLITE_DONE ){ 661 if( rc==SQLITE_DONE ){
699 /* Special case - the last token of the snippet is also the last token 662 /* Special case - the last token of the snippet is also the last token
700 ** of the column. Append any punctuation that occurred between the end 663 ** of the column. Append any punctuation that occurred between the end
701 ** of the previous token and the end of the document to the output. 664 ** of the previous token and the end of the document to the output.
702 ** Then break out of the loop. */ 665 ** Then break out of the loop. */
703 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); 666 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
704 } 667 }
705 break; 668 break;
706 } 669 }
707 if( iCurrent<iPos ){ continue; } 670 if( iCurrent<iPos ){ continue; }
708 671
709 if( !isShiftDone ){ 672 if( !isShiftDone ){
710 int n = nDoc - iBegin; 673 int n = nDoc - iBegin;
711 rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); 674 rc = fts3SnippetShift(
675 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
676 );
712 isShiftDone = 1; 677 isShiftDone = 1;
713 678
714 /* Now that the shift has been done, check if the initial "..." are 679 /* Now that the shift has been done, check if the initial "..." are
715 ** required. They are required if (a) this is not the first fragment, 680 ** required. They are required if (a) this is not the first fragment,
716 ** or (b) this fragment does not begin at position 0 of its column. 681 ** or (b) this fragment does not begin at position 0 of its column.
717 */ 682 */
718 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ 683 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
719 rc = fts3StringAppend(pOut, zEllipsis, -1); 684 rc = fts3StringAppend(pOut, zEllipsis, -1);
720 } 685 }
721 if( rc!=SQLITE_OK || iCurrent<iPos ) continue; 686 if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
765 /* A column-list is terminated by either a 0x01 or 0x00. */ 730 /* A column-list is terminated by either a 0x01 or 0x00. */
766 while( 0xFE & (*pEnd | c) ){ 731 while( 0xFE & (*pEnd | c) ){
767 c = *pEnd++ & 0x80; 732 c = *pEnd++ & 0x80;
768 if( !c ) nEntry++; 733 if( !c ) nEntry++;
769 } 734 }
770 735
771 *ppCollist = pEnd; 736 *ppCollist = pEnd;
772 return nEntry; 737 return nEntry;
773 } 738 }
774 739
775 static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){
776 char *pCsr = *pp;
777 while( *pCsr ){
778 int nHit;
779 sqlite3_int64 iCol = 0;
780 if( *pCsr==0x01 ){
781 pCsr++;
782 pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
783 }
784 nHit = fts3ColumnlistCount(&pCsr);
785 assert( nHit>0 );
786 if( isGlobal ){
787 aOut[iCol*3+1]++;
788 }
789 aOut[iCol*3] += nHit;
790 }
791 pCsr++;
792 *pp = pCsr;
793 }
794
795 /* 740 /*
796 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats 741 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
797 ** for a single query. 742 ** for a single query.
798 ** 743 **
799 ** fts3ExprIterate() callback to load the 'global' elements of a 744 ** fts3ExprIterate() callback to load the 'global' elements of a
800 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements 745 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
801 ** of the matchinfo array that are constant for all rows returned by the 746 ** of the matchinfo array that are constant for all rows returned by the
802 ** current query. 747 ** current query.
803 ** 748 **
804 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This 749 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
(...skipping 13 matching lines...) Expand all
818 ** file system. This is done because the full-text index doclist is required 763 ** file system. This is done because the full-text index doclist is required
819 ** to calculate these values properly, and the full-text index doclist is 764 ** to calculate these values properly, and the full-text index doclist is
820 ** not available for deferred tokens. 765 ** not available for deferred tokens.
821 */ 766 */
822 static int fts3ExprGlobalHitsCb( 767 static int fts3ExprGlobalHitsCb(
823 Fts3Expr *pExpr, /* Phrase expression node */ 768 Fts3Expr *pExpr, /* Phrase expression node */
824 int iPhrase, /* Phrase number (numbered from zero) */ 769 int iPhrase, /* Phrase number (numbered from zero) */
825 void *pCtx /* Pointer to MatchInfo structure */ 770 void *pCtx /* Pointer to MatchInfo structure */
826 ){ 771 ){
827 MatchInfo *p = (MatchInfo *)pCtx; 772 MatchInfo *p = (MatchInfo *)pCtx;
828 Fts3Cursor *pCsr = p->pCursor; 773 return sqlite3Fts3EvalPhraseStats(
829 char *pIter; 774 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
830 char *pEnd; 775 );
831 char *pFree = 0;
832 u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol];
833
834 assert( pExpr->isLoaded );
835 assert( pExpr->eType==FTSQUERY_PHRASE );
836
837 if( pCsr->pDeferred ){
838 Fts3Phrase *pPhrase = pExpr->pPhrase;
839 int ii;
840 for(ii=0; ii<pPhrase->nToken; ii++){
841 if( pPhrase->aToken[ii].bFulltext ) break;
842 }
843 if( ii<pPhrase->nToken ){
844 int nFree = 0;
845 int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree);
846 if( rc!=SQLITE_OK ) return rc;
847 pIter = pFree;
848 pEnd = &pFree[nFree];
849 }else{
850 int iCol; /* Column index */
851 for(iCol=0; iCol<p->nCol; iCol++){
852 aOut[iCol*3 + 1] = (u32)p->nDoc;
853 aOut[iCol*3 + 2] = (u32)p->nDoc;
854 }
855 return SQLITE_OK;
856 }
857 }else{
858 pIter = pExpr->aDoclist;
859 pEnd = &pExpr->aDoclist[pExpr->nDoclist];
860 }
861
862 /* Fill in the global hit count matrix row for this phrase. */
863 while( pIter<pEnd ){
864 while( *pIter++ & 0x80 ); /* Skip past docid. */
865 fts3LoadColumnlistCounts(&pIter, &aOut[1], 1);
866 }
867
868 sqlite3_free(pFree);
869 return SQLITE_OK;
870 } 776 }
871 777
872 /* 778 /*
873 ** fts3ExprIterate() callback used to collect the "local" part of the 779 ** fts3ExprIterate() callback used to collect the "local" part of the
874 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the 780 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
875 ** array that are different for each row returned by the query. 781 ** array that are different for each row returned by the query.
876 */ 782 */
877 static int fts3ExprLocalHitsCb( 783 static int fts3ExprLocalHitsCb(
878 Fts3Expr *pExpr, /* Phrase expression node */ 784 Fts3Expr *pExpr, /* Phrase expression node */
879 int iPhrase, /* Phrase number */ 785 int iPhrase, /* Phrase number */
880 void *pCtx /* Pointer to MatchInfo structure */ 786 void *pCtx /* Pointer to MatchInfo structure */
881 ){ 787 ){
788 int rc = SQLITE_OK;
882 MatchInfo *p = (MatchInfo *)pCtx; 789 MatchInfo *p = (MatchInfo *)pCtx;
883 int iStart = iPhrase * p->nCol * 3; 790 int iStart = iPhrase * p->nCol * 3;
884 int i; 791 int i;
885 792
886 for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0; 793 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
887
888 if( pExpr->aDoclist ){
889 char *pCsr; 794 char *pCsr;
890 795 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
891 pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
892 if( pCsr ){ 796 if( pCsr ){
893 fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0); 797 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
798 }else{
799 p->aMatchinfo[iStart+i*3] = 0;
894 } 800 }
895 } 801 }
896 802
897 return SQLITE_OK; 803 return rc;
898 } 804 }
899 805
900 static int fts3MatchinfoCheck( 806 static int fts3MatchinfoCheck(
901 Fts3Table *pTab, 807 Fts3Table *pTab,
902 char cArg, 808 char cArg,
903 char **pzErr 809 char **pzErr
904 ){ 810 ){
905 if( (cArg==FTS3_MATCHINFO_NPHRASE) 811 if( (cArg==FTS3_MATCHINFO_NPHRASE)
906 || (cArg==FTS3_MATCHINFO_NCOL) 812 || (cArg==FTS3_MATCHINFO_NCOL)
907 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) 813 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
908 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) 814 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
909 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) 815 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
910 || (cArg==FTS3_MATCHINFO_LCS) 816 || (cArg==FTS3_MATCHINFO_LCS)
911 || (cArg==FTS3_MATCHINFO_HITS) 817 || (cArg==FTS3_MATCHINFO_HITS)
912 ){ 818 ){
913 return SQLITE_OK; 819 return SQLITE_OK;
914 } 820 }
915 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); 821 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
916 return SQLITE_ERROR; 822 return SQLITE_ERROR;
917 } 823 }
918 824
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
953 859
954 if( !*ppStmt ){ 860 if( !*ppStmt ){
955 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); 861 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
956 if( rc!=SQLITE_OK ) return rc; 862 if( rc!=SQLITE_OK ) return rc;
957 } 863 }
958 pStmt = *ppStmt; 864 pStmt = *ppStmt;
959 assert( sqlite3_data_count(pStmt)==1 ); 865 assert( sqlite3_data_count(pStmt)==1 );
960 866
961 a = sqlite3_column_blob(pStmt, 0); 867 a = sqlite3_column_blob(pStmt, 0);
962 a += sqlite3Fts3GetVarint(a, &nDoc); 868 a += sqlite3Fts3GetVarint(a, &nDoc);
963 if( nDoc==0 ) return SQLITE_CORRUPT; 869 if( nDoc==0 ) return FTS_CORRUPT_VTAB;
964 *pnDoc = (u32)nDoc; 870 *pnDoc = (u32)nDoc;
965 871
966 if( paLen ) *paLen = a; 872 if( paLen ) *paLen = a;
967 return SQLITE_OK; 873 return SQLITE_OK;
968 } 874 }
969 875
970 /* 876 /*
971 ** An instance of the following structure is used to store state while 877 ** An instance of the following structure is used to store state while
972 ** iterating through a multi-column position-list corresponding to the 878 ** iterating through a multi-column position-list corresponding to the
973 ** hits for a single phrase on a single row in order to calculate the 879 ** hits for a single phrase on a single row in order to calculate the
974 ** values for a matchinfo() FTS3_MATCHINFO_LCS request. 880 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
975 */ 881 */
976 typedef struct LcsIterator LcsIterator; 882 typedef struct LcsIterator LcsIterator;
977 struct LcsIterator { 883 struct LcsIterator {
978 Fts3Expr *pExpr; /* Pointer to phrase expression */ 884 Fts3Expr *pExpr; /* Pointer to phrase expression */
885 int iPosOffset; /* Tokens count up to end of this phrase */
979 char *pRead; /* Cursor used to iterate through aDoclist */ 886 char *pRead; /* Cursor used to iterate through aDoclist */
980 int iPosOffset; /* Tokens count up to end of this phrase */
981 int iCol; /* Current column number */
982 int iPos; /* Current position */ 887 int iPos; /* Current position */
983 }; 888 };
984 889
985 /* 890 /*
986 ** If LcsIterator.iCol is set to the following value, the iterator has 891 ** If LcsIterator.iCol is set to the following value, the iterator has
987 ** finished iterating through all offsets for all columns. 892 ** finished iterating through all offsets for all columns.
988 */ 893 */
989 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF; 894 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
990 895
991 static int fts3MatchinfoLcsCb( 896 static int fts3MatchinfoLcsCb(
(...skipping 10 matching lines...) Expand all
1002 ** Advance the iterator passed as an argument to the next position. Return 907 ** Advance the iterator passed as an argument to the next position. Return
1003 ** 1 if the iterator is at EOF or if it now points to the start of the 908 ** 1 if the iterator is at EOF or if it now points to the start of the
1004 ** position list for the next column. 909 ** position list for the next column.
1005 */ 910 */
1006 static int fts3LcsIteratorAdvance(LcsIterator *pIter){ 911 static int fts3LcsIteratorAdvance(LcsIterator *pIter){
1007 char *pRead = pIter->pRead; 912 char *pRead = pIter->pRead;
1008 sqlite3_int64 iRead; 913 sqlite3_int64 iRead;
1009 int rc = 0; 914 int rc = 0;
1010 915
1011 pRead += sqlite3Fts3GetVarint(pRead, &iRead); 916 pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1012 if( iRead==0 ){ 917 if( iRead==0 || iRead==1 ){
1013 pIter->iCol = LCS_ITERATOR_FINISHED; 918 pRead = 0;
1014 rc = 1; 919 rc = 1;
1015 }else{ 920 }else{
1016 if( iRead==1 ){
1017 pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1018 pIter->iCol = (int)iRead;
1019 pIter->iPos = pIter->iPosOffset;
1020 pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1021 rc = 1;
1022 }
1023 pIter->iPos += (int)(iRead-2); 921 pIter->iPos += (int)(iRead-2);
1024 } 922 }
1025 923
1026 pIter->pRead = pRead; 924 pIter->pRead = pRead;
1027 return rc; 925 return rc;
1028 } 926 }
1029 927
1030 /* 928 /*
1031 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. 929 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1032 ** 930 **
(...skipping 11 matching lines...) Expand all
1044 int iCol; 942 int iCol;
1045 int nToken = 0; 943 int nToken = 0;
1046 944
1047 /* Allocate and populate the array of LcsIterator objects. The array 945 /* Allocate and populate the array of LcsIterator objects. The array
1048 ** contains one element for each matchable phrase in the query. 946 ** contains one element for each matchable phrase in the query.
1049 **/ 947 **/
1050 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); 948 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
1051 if( !aIter ) return SQLITE_NOMEM; 949 if( !aIter ) return SQLITE_NOMEM;
1052 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); 950 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
1053 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); 951 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
952
1054 for(i=0; i<pInfo->nPhrase; i++){ 953 for(i=0; i<pInfo->nPhrase; i++){
1055 LcsIterator *pIter = &aIter[i]; 954 LcsIterator *pIter = &aIter[i];
1056 nToken -= pIter->pExpr->pPhrase->nToken; 955 nToken -= pIter->pExpr->pPhrase->nToken;
1057 pIter->iPosOffset = nToken; 956 pIter->iPosOffset = nToken;
1058 pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
1059 if( pIter->pRead ){
1060 pIter->iPos = pIter->iPosOffset;
1061 fts3LcsIteratorAdvance(&aIter[i]);
1062 }else{
1063 pIter->iCol = LCS_ITERATOR_FINISHED;
1064 }
1065 } 957 }
1066 958
1067 for(iCol=0; iCol<pInfo->nCol; iCol++){ 959 for(iCol=0; iCol<pInfo->nCol; iCol++){
1068 int nLcs = 0; /* LCS value for this column */ 960 int nLcs = 0; /* LCS value for this column */
1069 int nLive = 0; /* Number of iterators in aIter not at EOF */ 961 int nLive = 0; /* Number of iterators in aIter not at EOF */
1070 962
1071 /* Loop through the iterators in aIter[]. Set nLive to the number of
1072 ** iterators that point to a position-list corresponding to column iCol.
1073 */
1074 for(i=0; i<pInfo->nPhrase; i++){ 963 for(i=0; i<pInfo->nPhrase; i++){
1075 assert( aIter[i].iCol>=iCol ); 964 int rc;
1076 if( aIter[i].iCol==iCol ) nLive++; 965 LcsIterator *pIt = &aIter[i];
966 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
967 if( rc!=SQLITE_OK ) return rc;
968 if( pIt->pRead ){
969 pIt->iPos = pIt->iPosOffset;
970 fts3LcsIteratorAdvance(&aIter[i]);
971 nLive++;
972 }
1077 } 973 }
1078 974
1079 /* The following loop runs until all iterators in aIter[] have finished
1080 ** iterating through positions in column iCol. Exactly one of the
1081 ** iterators is advanced each time the body of the loop is run.
1082 */
1083 while( nLive>0 ){ 975 while( nLive>0 ){
1084 LcsIterator *pAdv = 0; /* The iterator to advance by one position */ 976 LcsIterator *pAdv = 0; /* The iterator to advance by one position */
1085 int nThisLcs = 0; /* LCS for the current iterator positions */ 977 int nThisLcs = 0; /* LCS for the current iterator positions */
1086 978
1087 for(i=0; i<pInfo->nPhrase; i++){ 979 for(i=0; i<pInfo->nPhrase; i++){
1088 LcsIterator *pIter = &aIter[i]; 980 LcsIterator *pIter = &aIter[i];
1089 if( iCol!=pIter->iCol ){ 981 if( pIter->pRead==0 ){
1090 /* This iterator is already at EOF for this column. */ 982 /* This iterator is already at EOF for this column. */
1091 nThisLcs = 0; 983 nThisLcs = 0;
1092 }else{ 984 }else{
1093 if( pAdv==0 || pIter->iPos<pAdv->iPos ){ 985 if( pAdv==0 || pIter->iPos<pAdv->iPos ){
1094 pAdv = pIter; 986 pAdv = pIter;
1095 } 987 }
1096 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ 988 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
1097 nThisLcs++; 989 nThisLcs++;
1098 }else{ 990 }else{
1099 nThisLcs = 1; 991 nThisLcs = 1;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
1145 case FTS3_MATCHINFO_NPHRASE: 1037 case FTS3_MATCHINFO_NPHRASE:
1146 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; 1038 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1147 break; 1039 break;
1148 1040
1149 case FTS3_MATCHINFO_NCOL: 1041 case FTS3_MATCHINFO_NCOL:
1150 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; 1042 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1151 break; 1043 break;
1152 1044
1153 case FTS3_MATCHINFO_NDOC: 1045 case FTS3_MATCHINFO_NDOC:
1154 if( bGlobal ){ 1046 if( bGlobal ){
1155 sqlite3_int64 nDoc; 1047 sqlite3_int64 nDoc = 0;
1156 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); 1048 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
1157 pInfo->aMatchinfo[0] = (u32)nDoc; 1049 pInfo->aMatchinfo[0] = (u32)nDoc;
1158 } 1050 }
1159 break; 1051 break;
1160 1052
1161 case FTS3_MATCHINFO_AVGLENGTH: 1053 case FTS3_MATCHINFO_AVGLENGTH:
1162 if( bGlobal ){ 1054 if( bGlobal ){
1163 sqlite3_int64 nDoc; /* Number of rows in table */ 1055 sqlite3_int64 nDoc; /* Number of rows in table */
1164 const char *a; /* Aggregate column length array */ 1056 const char *a; /* Aggregate column length array */
1165 1057
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
1401 typedef struct TermOffset TermOffset; 1293 typedef struct TermOffset TermOffset;
1402 typedef struct TermOffsetCtx TermOffsetCtx; 1294 typedef struct TermOffsetCtx TermOffsetCtx;
1403 1295
1404 struct TermOffset { 1296 struct TermOffset {
1405 char *pList; /* Position-list */ 1297 char *pList; /* Position-list */
1406 int iPos; /* Position just read from pList */ 1298 int iPos; /* Position just read from pList */
1407 int iOff; /* Offset of this term from read positions */ 1299 int iOff; /* Offset of this term from read positions */
1408 }; 1300 };
1409 1301
1410 struct TermOffsetCtx { 1302 struct TermOffsetCtx {
1303 Fts3Cursor *pCsr;
1411 int iCol; /* Column of table to populate aTerm for */ 1304 int iCol; /* Column of table to populate aTerm for */
1412 int iTerm; 1305 int iTerm;
1413 sqlite3_int64 iDocid; 1306 sqlite3_int64 iDocid;
1414 TermOffset *aTerm; 1307 TermOffset *aTerm;
1415 }; 1308 };
1416 1309
1417 /* 1310 /*
1418 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). 1311 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1419 */ 1312 */
1420 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ 1313 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1421 TermOffsetCtx *p = (TermOffsetCtx *)ctx; 1314 TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1422 int nTerm; /* Number of tokens in phrase */ 1315 int nTerm; /* Number of tokens in phrase */
1423 int iTerm; /* For looping through nTerm phrase terms */ 1316 int iTerm; /* For looping through nTerm phrase terms */
1424 char *pList; /* Pointer to position list for phrase */ 1317 char *pList; /* Pointer to position list for phrase */
1425 int iPos = 0; /* First position in position-list */ 1318 int iPos = 0; /* First position in position-list */
1319 int rc;
1426 1320
1427 UNUSED_PARAMETER(iPhrase); 1321 UNUSED_PARAMETER(iPhrase);
1428 pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); 1322 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
1429 nTerm = pExpr->pPhrase->nToken; 1323 nTerm = pExpr->pPhrase->nToken;
1430 if( pList ){ 1324 if( pList ){
1431 fts3GetDeltaPosition(&pList, &iPos); 1325 fts3GetDeltaPosition(&pList, &iPos);
1432 assert( iPos>=0 ); 1326 assert( iPos>=0 );
1433 } 1327 }
1434 1328
1435 for(iTerm=0; iTerm<nTerm; iTerm++){ 1329 for(iTerm=0; iTerm<nTerm; iTerm++){
1436 TermOffset *pT = &p->aTerm[p->iTerm++]; 1330 TermOffset *pT = &p->aTerm[p->iTerm++];
1437 pT->iOff = nTerm-iTerm-1; 1331 pT->iOff = nTerm-iTerm-1;
1438 pT->pList = pList; 1332 pT->pList = pList;
1439 pT->iPos = iPos; 1333 pT->iPos = iPos;
1440 } 1334 }
1441 1335
1442 return SQLITE_OK; 1336 return rc;
1443 } 1337 }
1444 1338
1445 /* 1339 /*
1446 ** Implementation of offsets() function. 1340 ** Implementation of offsets() function.
1447 */ 1341 */
1448 void sqlite3Fts3Offsets( 1342 void sqlite3Fts3Offsets(
1449 sqlite3_context *pCtx, /* SQLite function call context */ 1343 sqlite3_context *pCtx, /* SQLite function call context */
1450 Fts3Cursor *pCsr /* Cursor object */ 1344 Fts3Cursor *pCsr /* Cursor object */
1451 ){ 1345 ){
1452 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; 1346 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1453 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; 1347 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1454 const char *ZDUMMY; /* Dummy argument used with xNext() */
1455 int NDUMMY; /* Dummy argument used with xNext() */
1456 int rc; /* Return Code */ 1348 int rc; /* Return Code */
1457 int nToken; /* Number of tokens in query */ 1349 int nToken; /* Number of tokens in query */
1458 int iCol; /* Column currently being processed */ 1350 int iCol; /* Column currently being processed */
1459 StrBuffer res = {0, 0, 0}; /* Result string */ 1351 StrBuffer res = {0, 0, 0}; /* Result string */
1460 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ 1352 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
1461 1353
1462 if( !pCsr->pExpr ){ 1354 if( !pCsr->pExpr ){
1463 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); 1355 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1464 return; 1356 return;
1465 } 1357 }
1466 1358
1467 memset(&sCtx, 0, sizeof(sCtx)); 1359 memset(&sCtx, 0, sizeof(sCtx));
1468 assert( pCsr->isRequireSeek==0 ); 1360 assert( pCsr->isRequireSeek==0 );
1469 1361
1470 /* Count the number of terms in the query */ 1362 /* Count the number of terms in the query */
1471 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); 1363 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1472 if( rc!=SQLITE_OK ) goto offsets_out; 1364 if( rc!=SQLITE_OK ) goto offsets_out;
1473 1365
1474 /* Allocate the array of TermOffset iterators. */ 1366 /* Allocate the array of TermOffset iterators. */
1475 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); 1367 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
1476 if( 0==sCtx.aTerm ){ 1368 if( 0==sCtx.aTerm ){
1477 rc = SQLITE_NOMEM; 1369 rc = SQLITE_NOMEM;
1478 goto offsets_out; 1370 goto offsets_out;
1479 } 1371 }
1480 sCtx.iDocid = pCsr->iPrevId; 1372 sCtx.iDocid = pCsr->iPrevId;
1373 sCtx.pCsr = pCsr;
1481 1374
1482 /* Loop through the table columns, appending offset information to 1375 /* Loop through the table columns, appending offset information to
1483 ** string-buffer res for each column. 1376 ** string-buffer res for each column.
1484 */ 1377 */
1485 for(iCol=0; iCol<pTab->nColumn; iCol++){ 1378 for(iCol=0; iCol<pTab->nColumn; iCol++){
1486 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ 1379 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1487 int iStart; 1380 const char *ZDUMMY; /* Dummy argument used with xNext() */
1488 int iEnd; 1381 int NDUMMY = 0; /* Dummy argument used with xNext() */
1489 int iCurrent; 1382 int iStart = 0;
1383 int iEnd = 0;
1384 int iCurrent = 0;
1490 const char *zDoc; 1385 const char *zDoc;
1491 int nDoc; 1386 int nDoc;
1492 1387
1493 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is 1388 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1494 ** no way that this operation can fail, so the return code from 1389 ** no way that this operation can fail, so the return code from
1495 ** fts3ExprIterate() can be discarded. 1390 ** fts3ExprIterate() can be discarded.
1496 */ 1391 */
1497 sCtx.iCol = iCol; 1392 sCtx.iCol = iCol;
1498 sCtx.iTerm = 0; 1393 sCtx.iTerm = 0;
1499 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); 1394 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
1500 1395
1501 /* Retreive the text stored in column iCol. If an SQL NULL is stored 1396 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1502 ** in column iCol, jump immediately to the next iteration of the loop. 1397 ** in column iCol, jump immediately to the next iteration of the loop.
1503 ** If an OOM occurs while retrieving the data (this can happen if SQLite 1398 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1504 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM 1399 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1505 ** to the caller. 1400 ** to the caller.
1506 */ 1401 */
1507 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); 1402 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1508 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); 1403 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1509 if( zDoc==0 ){ 1404 if( zDoc==0 ){
1510 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ 1405 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1511 continue; 1406 continue;
1512 } 1407 }
1513 rc = SQLITE_NOMEM; 1408 rc = SQLITE_NOMEM;
1514 goto offsets_out; 1409 goto offsets_out;
1515 } 1410 }
1516 1411
1517 /* Initialize a tokenizer iterator to iterate through column iCol. */ 1412 /* Initialize a tokenizer iterator to iterate through column iCol. */
1518 rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); 1413 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
1414 zDoc, nDoc, &pC
1415 );
1519 if( rc!=SQLITE_OK ) goto offsets_out; 1416 if( rc!=SQLITE_OK ) goto offsets_out;
1520 pC->pTokenizer = pTab->pTokenizer;
1521 1417
1522 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); 1418 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1523 while( rc==SQLITE_OK ){ 1419 while( rc==SQLITE_OK ){
1524 int i; /* Used to loop through terms */ 1420 int i; /* Used to loop through terms */
1525 int iMinPos = 0x7FFFFFFF; /* Position of next token */ 1421 int iMinPos = 0x7FFFFFFF; /* Position of next token */
1526 TermOffset *pTerm = 0; /* TermOffset associated with next token */ 1422 TermOffset *pTerm = 0; /* TermOffset associated with next token */
1527 1423
1528 for(i=0; i<nToken; i++){ 1424 for(i=0; i<nToken; i++){
1529 TermOffset *pT = &sCtx.aTerm[i]; 1425 TermOffset *pT = &sCtx.aTerm[i];
1530 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ 1426 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1531 iMinPos = pT->iPos-pT->iOff; 1427 iMinPos = pT->iPos-pT->iOff;
1532 pTerm = pT; 1428 pTerm = pT;
1533 } 1429 }
1534 } 1430 }
1535 1431
1536 if( !pTerm ){ 1432 if( !pTerm ){
1537 /* All offsets for this column have been gathered. */ 1433 /* All offsets for this column have been gathered. */
1538 break; 1434 rc = SQLITE_DONE;
1539 }else{ 1435 }else{
1540 assert( iCurrent<=iMinPos ); 1436 assert( iCurrent<=iMinPos );
1541 if( 0==(0xFE&*pTerm->pList) ){ 1437 if( 0==(0xFE&*pTerm->pList) ){
1542 pTerm->pList = 0; 1438 pTerm->pList = 0;
1543 }else{ 1439 }else{
1544 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); 1440 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1545 } 1441 }
1546 while( rc==SQLITE_OK && iCurrent<iMinPos ){ 1442 while( rc==SQLITE_OK && iCurrent<iMinPos ){
1547 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); 1443 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1548 } 1444 }
1549 if( rc==SQLITE_OK ){ 1445 if( rc==SQLITE_OK ){
1550 char aBuffer[64]; 1446 char aBuffer[64];
1551 sqlite3_snprintf(sizeof(aBuffer), aBuffer, 1447 sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1552 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart 1448 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1553 ); 1449 );
1554 rc = fts3StringAppend(&res, aBuffer, -1); 1450 rc = fts3StringAppend(&res, aBuffer, -1);
1555 }else if( rc==SQLITE_DONE ){ 1451 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
1556 rc = SQLITE_CORRUPT; 1452 rc = FTS_CORRUPT_VTAB;
1557 } 1453 }
1558 } 1454 }
1559 } 1455 }
1560 if( rc==SQLITE_DONE ){ 1456 if( rc==SQLITE_DONE ){
1561 rc = SQLITE_OK; 1457 rc = SQLITE_OK;
1562 } 1458 }
1563 1459
1564 pMod->xClose(pC); 1460 pMod->xClose(pC);
1565 if( rc!=SQLITE_OK ) goto offsets_out; 1461 if( rc!=SQLITE_OK ) goto offsets_out;
1566 } 1462 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
1616 1512
1617 if( rc!=SQLITE_OK ){ 1513 if( rc!=SQLITE_OK ){
1618 sqlite3_result_error_code(pContext, rc); 1514 sqlite3_result_error_code(pContext, rc);
1619 }else{ 1515 }else{
1620 int n = pCsr->nMatchinfo * sizeof(u32); 1516 int n = pCsr->nMatchinfo * sizeof(u32);
1621 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); 1517 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
1622 } 1518 }
1623 } 1519 }
1624 1520
1625 #endif 1521 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698