Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(704)

Side by Side Diff: third_party/sqlite/src/ext/fts5/fts5_index.c

Issue 2751253002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2014 May 31 2 ** 2014 May 31
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after
254 ** many zero bytes. This makes it easier to decode the various record formats 254 ** many zero bytes. This makes it easier to decode the various record formats
255 ** without overreading if the records are corrupt. 255 ** without overreading if the records are corrupt.
256 */ 256 */
257 #define FTS5_DATA_ZERO_PADDING 8 257 #define FTS5_DATA_ZERO_PADDING 8
258 #define FTS5_DATA_PADDING 20 258 #define FTS5_DATA_PADDING 20
259 259
260 typedef struct Fts5Data Fts5Data; 260 typedef struct Fts5Data Fts5Data;
261 typedef struct Fts5DlidxIter Fts5DlidxIter; 261 typedef struct Fts5DlidxIter Fts5DlidxIter;
262 typedef struct Fts5DlidxLvl Fts5DlidxLvl; 262 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
263 typedef struct Fts5DlidxWriter Fts5DlidxWriter; 263 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
264 typedef struct Fts5Iter Fts5Iter;
264 typedef struct Fts5PageWriter Fts5PageWriter; 265 typedef struct Fts5PageWriter Fts5PageWriter;
265 typedef struct Fts5SegIter Fts5SegIter; 266 typedef struct Fts5SegIter Fts5SegIter;
266 typedef struct Fts5DoclistIter Fts5DoclistIter; 267 typedef struct Fts5DoclistIter Fts5DoclistIter;
267 typedef struct Fts5SegWriter Fts5SegWriter; 268 typedef struct Fts5SegWriter Fts5SegWriter;
268 typedef struct Fts5Structure Fts5Structure; 269 typedef struct Fts5Structure Fts5Structure;
269 typedef struct Fts5StructureLevel Fts5StructureLevel; 270 typedef struct Fts5StructureLevel Fts5StructureLevel;
270 typedef struct Fts5StructureSegment Fts5StructureSegment; 271 typedef struct Fts5StructureSegment Fts5StructureSegment;
271 272
272 struct Fts5Data { 273 struct Fts5Data {
273 u8 *p; /* Pointer to buffer containing record */ 274 u8 *p; /* Pointer to buffer containing record */
(...skipping 22 matching lines...) Expand all
296 int rc; /* Current error code */ 297 int rc; /* Current error code */
297 298
298 /* State used by the fts5DataXXX() functions. */ 299 /* State used by the fts5DataXXX() functions. */
299 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ 300 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
300 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ 301 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
301 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ 302 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
302 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ 303 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
303 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */ 304 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
304 sqlite3_stmt *pIdxSelect; 305 sqlite3_stmt *pIdxSelect;
305 int nRead; /* Total number of blocks read */ 306 int nRead; /* Total number of blocks read */
307
308 sqlite3_stmt *pDataVersion;
309 i64 iStructVersion; /* data_version when pStruct read */
310 Fts5Structure *pStruct; /* Current db structure (or NULL) */
306 }; 311 };
307 312
308 struct Fts5DoclistIter { 313 struct Fts5DoclistIter {
309 u8 *aEof; /* Pointer to 1 byte past end of doclist */ 314 u8 *aEof; /* Pointer to 1 byte past end of doclist */
310 315
311 /* Output variables. aPoslist==0 at EOF */ 316 /* Output variables. aPoslist==0 at EOF */
312 i64 iRowid; 317 i64 iRowid;
313 u8 *aPoslist; 318 u8 *aPoslist;
314 int nPoslist; 319 int nPoslist;
315 int nSize; 320 int nSize;
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
426 ** Index of current term on iTermLeafPgno. 431 ** Index of current term on iTermLeafPgno.
427 */ 432 */
428 struct Fts5SegIter { 433 struct Fts5SegIter {
429 Fts5StructureSegment *pSeg; /* Segment to iterate through */ 434 Fts5StructureSegment *pSeg; /* Segment to iterate through */
430 int flags; /* Mask of configuration flags */ 435 int flags; /* Mask of configuration flags */
431 int iLeafPgno; /* Current leaf page number */ 436 int iLeafPgno; /* Current leaf page number */
432 Fts5Data *pLeaf; /* Current leaf data */ 437 Fts5Data *pLeaf; /* Current leaf data */
433 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ 438 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
434 int iLeafOffset; /* Byte offset within current leaf */ 439 int iLeafOffset; /* Byte offset within current leaf */
435 440
441 /* Next method */
442 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
443
436 /* The page and offset from which the current term was read. The offset 444 /* The page and offset from which the current term was read. The offset
437 ** is the offset of the first rowid in the current doclist. */ 445 ** is the offset of the first rowid in the current doclist. */
438 int iTermLeafPgno; 446 int iTermLeafPgno;
439 int iTermLeafOffset; 447 int iTermLeafOffset;
440 448
441 int iPgidxOff; /* Next offset in pgidx */ 449 int iPgidxOff; /* Next offset in pgidx */
442 int iEndofDoclist; 450 int iEndofDoclist;
443 451
444 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ 452 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
445 int iRowidOffset; /* Current entry in aRowidOffset[] */ 453 int iRowidOffset; /* Current entry in aRowidOffset[] */
446 int nRowidOffset; /* Allocated size of aRowidOffset[] array */ 454 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
447 int *aRowidOffset; /* Array of offset to rowid fields */ 455 int *aRowidOffset; /* Array of offset to rowid fields */
448 456
449 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ 457 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
450 458
451 /* Variables populated based on current entry. */ 459 /* Variables populated based on current entry. */
452 Fts5Buffer term; /* Current term */ 460 Fts5Buffer term; /* Current term */
453 i64 iRowid; /* Current rowid */ 461 i64 iRowid; /* Current rowid */
454 int nPos; /* Number of bytes in current position list */ 462 int nPos; /* Number of bytes in current position list */
455 int bDel; /* True if the delete flag is set */ 463 u8 bDel; /* True if the delete flag is set */
456 }; 464 };
457 465
458 /* 466 /*
459 ** Argument is a pointer to an Fts5Data structure that contains a 467 ** Argument is a pointer to an Fts5Data structure that contains a
460 ** leaf page. 468 ** leaf page.
461 */ 469 */
462 #define ASSERT_SZLEAF_OK(x) assert( \ 470 #define ASSERT_SZLEAF_OK(x) assert( \
463 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ 471 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
464 ) 472 )
465 473
466 #define FTS5_SEGITER_ONETERM 0x01 474 #define FTS5_SEGITER_ONETERM 0x01
467 #define FTS5_SEGITER_REVERSE 0x02 475 #define FTS5_SEGITER_REVERSE 0x02
468 476
469
470 /* 477 /*
471 ** Argument is a pointer to an Fts5Data structure that contains a leaf 478 ** Argument is a pointer to an Fts5Data structure that contains a leaf
472 ** page. This macro evaluates to true if the leaf contains no terms, or 479 ** page. This macro evaluates to true if the leaf contains no terms, or
473 ** false if it contains at least one term. 480 ** false if it contains at least one term.
474 */ 481 */
475 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) 482 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
476 483
477 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) 484 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
478 485
479 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) 486 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
(...skipping 14 matching lines...) Expand all
494 ** points to the smaller term/rowid combination. Iterators at EOF are 501 ** points to the smaller term/rowid combination. Iterators at EOF are
495 ** considered to be greater than all other iterators. 502 ** considered to be greater than all other iterators.
496 ** 503 **
497 ** aFirst[1] contains the index in aSeg[] of the iterator that points to 504 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
498 ** the smallest key overall. aFirst[0] is unused. 505 ** the smallest key overall. aFirst[0] is unused.
499 ** 506 **
500 ** poslist: 507 ** poslist:
501 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. 508 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
502 ** There is no way to tell if this is populated or not. 509 ** There is no way to tell if this is populated or not.
503 */ 510 */
504 struct Fts5IndexIter { 511 struct Fts5Iter {
512 Fts5IndexIter base; /* Base class containing output vars */
513
505 Fts5Index *pIndex; /* Index that owns this iterator */ 514 Fts5Index *pIndex; /* Index that owns this iterator */
506 Fts5Structure *pStruct; /* Database structure for this iterator */ 515 Fts5Structure *pStruct; /* Database structure for this iterator */
507 Fts5Buffer poslist; /* Buffer containing current poslist */ 516 Fts5Buffer poslist; /* Buffer containing current poslist */
517 Fts5Colset *pColset; /* Restrict matches to these columns */
518
519 /* Invoked to set output variables. */
520 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
508 521
509 int nSeg; /* Size of aSeg[] array */ 522 int nSeg; /* Size of aSeg[] array */
510 int bRev; /* True to iterate in reverse order */ 523 int bRev; /* True to iterate in reverse order */
511 u8 bSkipEmpty; /* True to skip deleted entries */ 524 u8 bSkipEmpty; /* True to skip deleted entries */
512 u8 bEof; /* True at EOF */
513 u8 bFiltered; /* True if column-filter already applied */
514 525
515 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ 526 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
516 Fts5CResult *aFirst; /* Current merge state (see above) */ 527 Fts5CResult *aFirst; /* Current merge state (see above) */
517 Fts5SegIter aSeg[1]; /* Array of segment iterators */ 528 Fts5SegIter aSeg[1]; /* Array of segment iterators */
518 }; 529 };
519 530
520 531
521 /* 532 /*
522 ** An instance of the following type is used to iterate through the contents 533 ** An instance of the following type is used to iterate through the contents
523 ** of a doclist-index record. 534 ** of a doclist-index record.
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
593 ** +ve if pRight is smaller than pLeft. In other words: 604 ** +ve if pRight is smaller than pLeft. In other words:
594 ** 605 **
595 ** res = *pLeft - *pRight 606 ** res = *pLeft - *pRight
596 */ 607 */
597 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ 608 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
598 int nCmp = MIN(pLeft->n, pRight->n); 609 int nCmp = MIN(pLeft->n, pRight->n);
599 int res = memcmp(pLeft->p, pRight->p, nCmp); 610 int res = memcmp(pLeft->p, pRight->p, nCmp);
600 return (res==0 ? (pLeft->n - pRight->n) : res); 611 return (res==0 ? (pLeft->n - pRight->n) : res);
601 } 612 }
602 613
603 #ifdef SQLITE_DEBUG
604 static int fts5BlobCompare(
605 const u8 *pLeft, int nLeft,
606 const u8 *pRight, int nRight
607 ){
608 int nCmp = MIN(nLeft, nRight);
609 int res = memcmp(pLeft, pRight, nCmp);
610 return (res==0 ? (nLeft - nRight) : res);
611 }
612 #endif
613
614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ 614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
615 int ret; 615 int ret;
616 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); 616 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
617 return ret; 617 return ret;
618 } 618 }
619 619
620 /* 620 /*
621 ** Close the read-only blob handle, if it is open. 621 ** Close the read-only blob handle, if it is open.
622 */ 622 */
623 static void fts5CloseReader(Fts5Index *p){ 623 static void fts5CloseReader(Fts5Index *p){
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
703 } 703 }
704 704
705 /* 705 /*
706 ** Release a reference to data record returned by an earlier call to 706 ** Release a reference to data record returned by an earlier call to
707 ** fts5DataRead(). 707 ** fts5DataRead().
708 */ 708 */
709 static void fts5DataRelease(Fts5Data *pData){ 709 static void fts5DataRelease(Fts5Data *pData){
710 sqlite3_free(pData); 710 sqlite3_free(pData);
711 } 711 }
712 712
713 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
714 Fts5Data *pRet = fts5DataRead(p, iRowid);
715 if( pRet ){
716 if( pRet->szLeaf>pRet->nn ){
717 p->rc = FTS5_CORRUPT;
718 fts5DataRelease(pRet);
719 pRet = 0;
720 }
721 }
722 return pRet;
723 }
724
713 static int fts5IndexPrepareStmt( 725 static int fts5IndexPrepareStmt(
714 Fts5Index *p, 726 Fts5Index *p,
715 sqlite3_stmt **ppStmt, 727 sqlite3_stmt **ppStmt,
716 char *zSql 728 char *zSql
717 ){ 729 ){
718 if( p->rc==SQLITE_OK ){ 730 if( p->rc==SQLITE_OK ){
719 if( zSql ){ 731 if( zSql ){
720 p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0); 732 p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0);
721 }else{ 733 }else{
722 p->rc = SQLITE_NOMEM; 734 p->rc = SQLITE_NOMEM;
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
862 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); 874 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
863 875
864 if( pRet ){ 876 if( pRet ){
865 pRet->nRef = 1; 877 pRet->nRef = 1;
866 pRet->nLevel = nLevel; 878 pRet->nLevel = nLevel;
867 pRet->nSegment = nSegment; 879 pRet->nSegment = nSegment;
868 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); 880 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
869 881
870 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ 882 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
871 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; 883 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
872 int nTotal; 884 int nTotal = 0;
873 int iSeg; 885 int iSeg;
874 886
875 i += fts5GetVarint32(&pData[i], pLvl->nMerge); 887 if( i>=nData ){
876 i += fts5GetVarint32(&pData[i], nTotal); 888 rc = FTS5_CORRUPT;
877 assert( nTotal>=pLvl->nMerge ); 889 }else{
878 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, 890 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
879 nTotal * sizeof(Fts5StructureSegment) 891 i += fts5GetVarint32(&pData[i], nTotal);
880 ); 892 assert( nTotal>=pLvl->nMerge );
893 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
894 nTotal * sizeof(Fts5StructureSegment)
895 );
896 }
881 897
882 if( rc==SQLITE_OK ){ 898 if( rc==SQLITE_OK ){
883 pLvl->nSeg = nTotal; 899 pLvl->nSeg = nTotal;
884 for(iSeg=0; iSeg<nTotal; iSeg++){ 900 for(iSeg=0; iSeg<nTotal; iSeg++){
901 if( i>=nData ){
902 rc = FTS5_CORRUPT;
903 break;
904 }
885 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); 905 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
886 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); 906 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
887 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); 907 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
888 } 908 }
889 }else{
890 fts5StructureRelease(pRet);
891 pRet = 0;
892 } 909 }
893 } 910 }
911 if( rc!=SQLITE_OK ){
912 fts5StructureRelease(pRet);
913 pRet = 0;
914 }
894 } 915 }
895 916
896 *ppOut = pRet; 917 *ppOut = pRet;
897 return rc; 918 return rc;
898 } 919 }
899 920
900 /* 921 /*
901 ** 922 **
902 */ 923 */
903 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ 924 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
946 memmove(&aNew[nExtra], aNew, nMove); 967 memmove(&aNew[nExtra], aNew, nMove);
947 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); 968 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
948 } 969 }
949 pLvl->aSeg = aNew; 970 pLvl->aSeg = aNew;
950 }else{ 971 }else{
951 *pRc = SQLITE_NOMEM; 972 *pRc = SQLITE_NOMEM;
952 } 973 }
953 } 974 }
954 } 975 }
955 976
977 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
978 Fts5Structure *pRet = 0;
979 Fts5Config *pConfig = p->pConfig;
980 int iCookie; /* Configuration cookie */
981 Fts5Data *pData;
982
983 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
984 if( p->rc==SQLITE_OK ){
985 /* TODO: Do we need this if the leaf-index is appended? Probably... */
986 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
987 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
988 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
989 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
990 }
991 fts5DataRelease(pData);
992 if( p->rc!=SQLITE_OK ){
993 fts5StructureRelease(pRet);
994 pRet = 0;
995 }
996 }
997
998 return pRet;
999 }
1000
1001 static i64 fts5IndexDataVersion(Fts5Index *p){
1002 i64 iVersion = 0;
1003
1004 if( p->rc==SQLITE_OK ){
1005 if( p->pDataVersion==0 ){
1006 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1007 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1008 );
1009 if( p->rc ) return 0;
1010 }
1011
1012 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1013 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1014 }
1015 p->rc = sqlite3_reset(p->pDataVersion);
1016 }
1017
1018 return iVersion;
1019 }
1020
956 /* 1021 /*
957 ** Read, deserialize and return the structure record. 1022 ** Read, deserialize and return the structure record.
958 ** 1023 **
959 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array 1024 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
960 ** are over-allocated as described for function fts5StructureDecode() 1025 ** are over-allocated as described for function fts5StructureDecode()
961 ** above. 1026 ** above.
962 ** 1027 **
963 ** If an error occurs, NULL is returned and an error code left in the 1028 ** If an error occurs, NULL is returned and an error code left in the
964 ** Fts5Index handle. If an error has already occurred when this function 1029 ** Fts5Index handle. If an error has already occurred when this function
965 ** is called, it is a no-op. 1030 ** is called, it is a no-op.
966 */ 1031 */
967 static Fts5Structure *fts5StructureRead(Fts5Index *p){ 1032 static Fts5Structure *fts5StructureRead(Fts5Index *p){
968 Fts5Config *pConfig = p->pConfig;
969 Fts5Structure *pRet = 0; /* Object to return */
970 int iCookie; /* Configuration cookie */
971 Fts5Data *pData;
972 1033
973 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); 1034 if( p->pStruct==0 ){
974 if( p->rc ) return 0; 1035 p->iStructVersion = fts5IndexDataVersion(p);
975 /* TODO: Do we need this if the leaf-index is appended? Probably... */ 1036 if( p->rc==SQLITE_OK ){
976 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); 1037 p->pStruct = fts5StructureReadUncached(p);
977 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); 1038 }
978 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
979 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
980 } 1039 }
981 1040
982 fts5DataRelease(pData); 1041 #if 0
983 if( p->rc!=SQLITE_OK ){ 1042 else{
984 fts5StructureRelease(pRet); 1043 Fts5Structure *pTest = fts5StructureReadUncached(p);
985 pRet = 0; 1044 if( pTest ){
1045 int i, j;
1046 assert_nc( p->pStruct->nSegment==pTest->nSegment );
1047 assert_nc( p->pStruct->nLevel==pTest->nLevel );
1048 for(i=0; i<pTest->nLevel; i++){
1049 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1050 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1051 for(j=0; j<pTest->aLevel[i].nSeg; j++){
1052 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1053 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1054 assert_nc( p1->iSegid==p2->iSegid );
1055 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1056 assert_nc( p1->pgnoLast==p2->pgnoLast );
1057 }
1058 }
1059 fts5StructureRelease(pTest);
1060 }
986 } 1061 }
987 return pRet; 1062 #endif
1063
1064 if( p->rc!=SQLITE_OK ) return 0;
1065 assert( p->iStructVersion!=0 );
1066 assert( p->pStruct!=0 );
1067 fts5StructureRef(p->pStruct);
1068 return p->pStruct;
1069 }
1070
1071 static void fts5StructureInvalidate(Fts5Index *p){
1072 if( p->pStruct ){
1073 fts5StructureRelease(p->pStruct);
1074 p->pStruct = 0;
1075 }
988 } 1076 }
989 1077
990 /* 1078 /*
991 ** Return the total number of segments in index structure pStruct. This 1079 ** Return the total number of segments in index structure pStruct. This
992 ** function is only ever used as part of assert() conditions. 1080 ** function is only ever used as part of assert() conditions.
993 */ 1081 */
994 #ifdef SQLITE_DEBUG 1082 #ifdef SQLITE_DEBUG
995 static int fts5StructureCountSegments(Fts5Structure *pStruct){ 1083 static int fts5StructureCountSegments(Fts5Structure *pStruct){
996 int nSegment = 0; /* Total number of segments */ 1084 int nSegment = 0; /* Total number of segments */
997 if( pStruct ){ 1085 if( pStruct ){
(...skipping 437 matching lines...) Expand 10 before | Expand all | Expand 10 after
1435 Fts5SegIter *pIter /* Iterator to advance to next page */ 1523 Fts5SegIter *pIter /* Iterator to advance to next page */
1436 ){ 1524 ){
1437 Fts5Data *pLeaf; 1525 Fts5Data *pLeaf;
1438 Fts5StructureSegment *pSeg = pIter->pSeg; 1526 Fts5StructureSegment *pSeg = pIter->pSeg;
1439 fts5DataRelease(pIter->pLeaf); 1527 fts5DataRelease(pIter->pLeaf);
1440 pIter->iLeafPgno++; 1528 pIter->iLeafPgno++;
1441 if( pIter->pNextLeaf ){ 1529 if( pIter->pNextLeaf ){
1442 pIter->pLeaf = pIter->pNextLeaf; 1530 pIter->pLeaf = pIter->pNextLeaf;
1443 pIter->pNextLeaf = 0; 1531 pIter->pNextLeaf = 0;
1444 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ 1532 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1445 pIter->pLeaf = fts5DataRead(p, 1533 pIter->pLeaf = fts5LeafRead(p,
1446 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) 1534 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1447 ); 1535 );
1448 }else{ 1536 }else{
1449 pIter->pLeaf = 0; 1537 pIter->pLeaf = 0;
1450 } 1538 }
1451 pLeaf = pIter->pLeaf; 1539 pLeaf = pIter->pLeaf;
1452 1540
1453 if( pLeaf ){ 1541 if( pLeaf ){
1454 pIter->iPgidxOff = pLeaf->szLeaf; 1542 pIter->iPgidxOff = pLeaf->szLeaf;
1455 if( fts5LeafIsTermless(pLeaf) ){ 1543 if( fts5LeafIsTermless(pLeaf) ){
(...skipping 29 matching lines...) Expand all
1485 ** 1573 **
1486 ** Fts5SegIter.nPos 1574 ** Fts5SegIter.nPos
1487 ** Fts5SegIter.bDel 1575 ** Fts5SegIter.bDel
1488 ** 1576 **
1489 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 1577 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1490 ** position list content (if any). 1578 ** position list content (if any).
1491 */ 1579 */
1492 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ 1580 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1493 if( p->rc==SQLITE_OK ){ 1581 if( p->rc==SQLITE_OK ){
1494 int iOff = pIter->iLeafOffset; /* Offset to read at */ 1582 int iOff = pIter->iLeafOffset; /* Offset to read at */
1495 int nSz;
1496 ASSERT_SZLEAF_OK(pIter->pLeaf); 1583 ASSERT_SZLEAF_OK(pIter->pLeaf);
1497 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); 1584 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1498 pIter->bDel = (nSz & 0x0001); 1585 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1499 pIter->nPos = nSz>>1; 1586 pIter->bDel = 0;
1587 pIter->nPos = 1;
1588 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1589 pIter->bDel = 1;
1590 iOff++;
1591 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1592 pIter->nPos = 1;
1593 iOff++;
1594 }else{
1595 pIter->nPos = 0;
1596 }
1597 }
1598 }else{
1599 int nSz;
1600 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1601 pIter->bDel = (nSz & 0x0001);
1602 pIter->nPos = nSz>>1;
1603 assert_nc( pIter->nPos>=0 );
1604 }
1500 pIter->iLeafOffset = iOff; 1605 pIter->iLeafOffset = iOff;
1501 assert_nc( pIter->nPos>=0 );
1502 } 1606 }
1503 } 1607 }
1504 1608
1505 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ 1609 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1506 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ 1610 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1507 int iOff = pIter->iLeafOffset; 1611 int iOff = pIter->iLeafOffset;
1508 1612
1509 ASSERT_SZLEAF_OK(pIter->pLeaf); 1613 ASSERT_SZLEAF_OK(pIter->pLeaf);
1510 if( iOff>=pIter->pLeaf->szLeaf ){ 1614 if( iOff>=pIter->pLeaf->szLeaf ){
1511 fts5SegIterNextPage(p, pIter); 1615 fts5SegIterNextPage(p, pIter);
(...skipping 22 matching lines...) Expand all
1534 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of 1638 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1535 ** the first position list. The position list belonging to document 1639 ** the first position list. The position list belonging to document
1536 ** (Fts5SegIter.iRowid). 1640 ** (Fts5SegIter.iRowid).
1537 */ 1641 */
1538 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ 1642 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1539 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ 1643 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1540 int iOff = pIter->iLeafOffset; /* Offset to read at */ 1644 int iOff = pIter->iLeafOffset; /* Offset to read at */
1541 int nNew; /* Bytes of new data */ 1645 int nNew; /* Bytes of new data */
1542 1646
1543 iOff += fts5GetVarint32(&a[iOff], nNew); 1647 iOff += fts5GetVarint32(&a[iOff], nNew);
1648 if( iOff+nNew>pIter->pLeaf->nn ){
1649 p->rc = FTS5_CORRUPT;
1650 return;
1651 }
1544 pIter->term.n = nKeep; 1652 pIter->term.n = nKeep;
1545 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); 1653 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1546 iOff += nNew; 1654 iOff += nNew;
1547 pIter->iTermLeafOffset = iOff; 1655 pIter->iTermLeafOffset = iOff;
1548 pIter->iTermLeafPgno = pIter->iLeafPgno; 1656 pIter->iTermLeafPgno = pIter->iLeafPgno;
1549 pIter->iLeafOffset = iOff; 1657 pIter->iLeafOffset = iOff;
1550 1658
1551 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ 1659 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1552 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 1660 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1553 }else{ 1661 }else{
1554 int nExtra; 1662 int nExtra;
1555 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); 1663 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1556 pIter->iEndofDoclist += nExtra; 1664 pIter->iEndofDoclist += nExtra;
1557 } 1665 }
1558 1666
1559 fts5SegIterLoadRowid(p, pIter); 1667 fts5SegIterLoadRowid(p, pIter);
1560 } 1668 }
1561 1669
1670 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1671 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1672 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1673
1674 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1675 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1676 pIter->xNext = fts5SegIterNext_Reverse;
1677 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1678 pIter->xNext = fts5SegIterNext_None;
1679 }else{
1680 pIter->xNext = fts5SegIterNext;
1681 }
1682 }
1683
1562 /* 1684 /*
1563 ** Initialize the iterator object pIter to iterate through the entries in 1685 ** Initialize the iterator object pIter to iterate through the entries in
1564 ** segment pSeg. The iterator is left pointing to the first entry when 1686 ** segment pSeg. The iterator is left pointing to the first entry when
1565 ** this function returns. 1687 ** this function returns.
1566 ** 1688 **
1567 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 1689 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1568 ** an error has already occurred when this function is called, it is a no-op. 1690 ** an error has already occurred when this function is called, it is a no-op.
1569 */ 1691 */
1570 static void fts5SegIterInit( 1692 static void fts5SegIterInit(
1571 Fts5Index *p, /* FTS index object */ 1693 Fts5Index *p, /* FTS index object */
1572 Fts5StructureSegment *pSeg, /* Description of segment */ 1694 Fts5StructureSegment *pSeg, /* Description of segment */
1573 Fts5SegIter *pIter /* Object to populate */ 1695 Fts5SegIter *pIter /* Object to populate */
1574 ){ 1696 ){
1575 if( pSeg->pgnoFirst==0 ){ 1697 if( pSeg->pgnoFirst==0 ){
1576 /* This happens if the segment is being used as an input to an incremental 1698 /* This happens if the segment is being used as an input to an incremental
1577 ** merge and all data has already been "trimmed". See function 1699 ** merge and all data has already been "trimmed". See function
1578 ** fts5TrimSegments() for details. In this case leave the iterator empty. 1700 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1579 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is 1701 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1580 ** at EOF already. */ 1702 ** at EOF already. */
1581 assert( pIter->pLeaf==0 ); 1703 assert( pIter->pLeaf==0 );
1582 return; 1704 return;
1583 } 1705 }
1584 1706
1585 if( p->rc==SQLITE_OK ){ 1707 if( p->rc==SQLITE_OK ){
1586 memset(pIter, 0, sizeof(*pIter)); 1708 memset(pIter, 0, sizeof(*pIter));
1709 fts5SegIterSetNext(p, pIter);
1587 pIter->pSeg = pSeg; 1710 pIter->pSeg = pSeg;
1588 pIter->iLeafPgno = pSeg->pgnoFirst-1; 1711 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1589 fts5SegIterNextPage(p, pIter); 1712 fts5SegIterNextPage(p, pIter);
1590 } 1713 }
1591 1714
1592 if( p->rc==SQLITE_OK ){ 1715 if( p->rc==SQLITE_OK ){
1593 pIter->iLeafOffset = 4; 1716 pIter->iLeafOffset = 4;
1594 assert_nc( pIter->pLeaf->nn>4 ); 1717 assert_nc( pIter->pLeaf->nn>4 );
1595 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); 1718 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1596 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; 1719 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
(...skipping 11 matching lines...) Expand all
1608 ** the position-list size field for the first relevant rowid on the page. 1731 ** the position-list size field for the first relevant rowid on the page.
1609 ** Fts5SegIter.rowid is set, but nPos and bDel are not. 1732 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1610 ** 1733 **
1611 ** This function advances the iterator so that it points to the last 1734 ** This function advances the iterator so that it points to the last
1612 ** relevant rowid on the page and, if necessary, initializes the 1735 ** relevant rowid on the page and, if necessary, initializes the
1613 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator 1736 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1614 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first 1737 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1615 ** byte of the position list content associated with said rowid. 1738 ** byte of the position list content associated with said rowid.
1616 */ 1739 */
1617 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ 1740 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1741 int eDetail = p->pConfig->eDetail;
1618 int n = pIter->pLeaf->szLeaf; 1742 int n = pIter->pLeaf->szLeaf;
1619 int i = pIter->iLeafOffset; 1743 int i = pIter->iLeafOffset;
1620 u8 *a = pIter->pLeaf->p; 1744 u8 *a = pIter->pLeaf->p;
1621 int iRowidOffset = 0; 1745 int iRowidOffset = 0;
1622 1746
1623 if( n>pIter->iEndofDoclist ){ 1747 if( n>pIter->iEndofDoclist ){
1624 n = pIter->iEndofDoclist; 1748 n = pIter->iEndofDoclist;
1625 } 1749 }
1626 1750
1627 ASSERT_SZLEAF_OK(pIter->pLeaf); 1751 ASSERT_SZLEAF_OK(pIter->pLeaf);
1628 while( 1 ){ 1752 while( 1 ){
1629 i64 iDelta = 0; 1753 i64 iDelta = 0;
1630 int nPos;
1631 int bDummy;
1632 1754
1633 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); 1755 if( eDetail==FTS5_DETAIL_NONE ){
1634 i += nPos; 1756 /* todo */
1757 if( i<n && a[i]==0 ){
1758 i++;
1759 if( i<n && a[i]==0 ) i++;
1760 }
1761 }else{
1762 int nPos;
1763 int bDummy;
1764 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1765 i += nPos;
1766 }
1635 if( i>=n ) break; 1767 if( i>=n ) break;
1636 i += fts5GetVarint(&a[i], (u64*)&iDelta); 1768 i += fts5GetVarint(&a[i], (u64*)&iDelta);
1637 pIter->iRowid += iDelta; 1769 pIter->iRowid += iDelta;
1638 1770
1771 /* If necessary, grow the pIter->aRowidOffset[] array. */
1639 if( iRowidOffset>=pIter->nRowidOffset ){ 1772 if( iRowidOffset>=pIter->nRowidOffset ){
1640 int nNew = pIter->nRowidOffset + 8; 1773 int nNew = pIter->nRowidOffset + 8;
1641 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); 1774 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
1642 if( aNew==0 ){ 1775 if( aNew==0 ){
1643 p->rc = SQLITE_NOMEM; 1776 p->rc = SQLITE_NOMEM;
1644 break; 1777 break;
1645 } 1778 }
1646 pIter->aRowidOffset = aNew; 1779 pIter->aRowidOffset = aNew;
1647 pIter->nRowidOffset = nNew; 1780 pIter->nRowidOffset = nNew;
1648 } 1781 }
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
1702 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 1835 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1703 fts5SegIterReverseInitPage(p, pIter); 1836 fts5SegIterReverseInitPage(p, pIter);
1704 } 1837 }
1705 } 1838 }
1706 1839
1707 /* 1840 /*
1708 ** Return true if the iterator passed as the second argument currently 1841 ** Return true if the iterator passed as the second argument currently
1709 ** points to a delete marker. A delete marker is an entry with a 0 byte 1842 ** points to a delete marker. A delete marker is an entry with a 0 byte
1710 ** position-list. 1843 ** position-list.
1711 */ 1844 */
1712 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){ 1845 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1713 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; 1846 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1714 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); 1847 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1715 } 1848 }
1716 1849
1717 /* 1850 /*
1851 ** Advance iterator pIter to the next entry.
1852 **
1853 ** This version of fts5SegIterNext() is only used by reverse iterators.
1854 */
1855 static void fts5SegIterNext_Reverse(
1856 Fts5Index *p, /* FTS5 backend object */
1857 Fts5SegIter *pIter, /* Iterator to advance */
1858 int *pbUnused /* Unused */
1859 ){
1860 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1861 assert( pIter->pNextLeaf==0 );
1862 UNUSED_PARAM(pbUnused);
1863
1864 if( pIter->iRowidOffset>0 ){
1865 u8 *a = pIter->pLeaf->p;
1866 int iOff;
1867 i64 iDelta;
1868
1869 pIter->iRowidOffset--;
1870 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1871 fts5SegIterLoadNPos(p, pIter);
1872 iOff = pIter->iLeafOffset;
1873 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1874 iOff += pIter->nPos;
1875 }
1876 fts5GetVarint(&a[iOff], (u64*)&iDelta);
1877 pIter->iRowid -= iDelta;
1878 }else{
1879 fts5SegIterReverseNewPage(p, pIter);
1880 }
1881 }
1882
1883 /*
1884 ** Advance iterator pIter to the next entry.
1885 **
1886 ** This version of fts5SegIterNext() is only used if detail=none and the
1887 ** iterator is not a reverse direction iterator.
1888 */
1889 static void fts5SegIterNext_None(
1890 Fts5Index *p, /* FTS5 backend object */
1891 Fts5SegIter *pIter, /* Iterator to advance */
1892 int *pbNewTerm /* OUT: Set for new term */
1893 ){
1894 int iOff;
1895
1896 assert( p->rc==SQLITE_OK );
1897 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1898 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1899
1900 ASSERT_SZLEAF_OK(pIter->pLeaf);
1901 iOff = pIter->iLeafOffset;
1902
1903 /* Next entry is on the next page */
1904 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1905 fts5SegIterNextPage(p, pIter);
1906 if( p->rc || pIter->pLeaf==0 ) return;
1907 pIter->iRowid = 0;
1908 iOff = 4;
1909 }
1910
1911 if( iOff<pIter->iEndofDoclist ){
1912 /* Next entry is on the current page */
1913 i64 iDelta;
1914 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1915 pIter->iLeafOffset = iOff;
1916 pIter->iRowid += iDelta;
1917 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1918 if( pIter->pSeg ){
1919 int nKeep = 0;
1920 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1921 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1922 }
1923 pIter->iLeafOffset = iOff;
1924 fts5SegIterLoadTerm(p, pIter, nKeep);
1925 }else{
1926 const u8 *pList = 0;
1927 const char *zTerm = 0;
1928 int nList;
1929 sqlite3Fts5HashScanNext(p->pHash);
1930 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1931 if( pList==0 ) goto next_none_eof;
1932 pIter->pLeaf->p = (u8*)pList;
1933 pIter->pLeaf->nn = nList;
1934 pIter->pLeaf->szLeaf = nList;
1935 pIter->iEndofDoclist = nList;
1936 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
1937 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1938 }
1939
1940 if( pbNewTerm ) *pbNewTerm = 1;
1941 }else{
1942 goto next_none_eof;
1943 }
1944
1945 fts5SegIterLoadNPos(p, pIter);
1946
1947 return;
1948 next_none_eof:
1949 fts5DataRelease(pIter->pLeaf);
1950 pIter->pLeaf = 0;
1951 }
1952
1953
1954 /*
1718 ** Advance iterator pIter to the next entry. 1955 ** Advance iterator pIter to the next entry.
1719 ** 1956 **
1720 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 1957 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
1721 ** is not considered an error if the iterator reaches EOF. If an error has 1958 ** is not considered an error if the iterator reaches EOF. If an error has
1722 ** already occurred when this function is called, it is a no-op. 1959 ** already occurred when this function is called, it is a no-op.
1723 */ 1960 */
1724 static void fts5SegIterNext( 1961 static void fts5SegIterNext(
1725 Fts5Index *p, /* FTS5 backend object */ 1962 Fts5Index *p, /* FTS5 backend object */
1726 Fts5SegIter *pIter, /* Iterator to advance */ 1963 Fts5SegIter *pIter, /* Iterator to advance */
1727 int *pbNewTerm /* OUT: Set for new term */ 1964 int *pbNewTerm /* OUT: Set for new term */
1728 ){ 1965 ){
1966 Fts5Data *pLeaf = pIter->pLeaf;
1967 int iOff;
1968 int bNewTerm = 0;
1969 int nKeep = 0;
1970 u8 *a;
1971 int n;
1972
1729 assert( pbNewTerm==0 || *pbNewTerm==0 ); 1973 assert( pbNewTerm==0 || *pbNewTerm==0 );
1730 if( p->rc==SQLITE_OK ){ 1974 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
1731 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1732 assert( pIter->pNextLeaf==0 );
1733 if( pIter->iRowidOffset>0 ){
1734 u8 *a = pIter->pLeaf->p;
1735 int iOff;
1736 int nPos;
1737 int bDummy;
1738 i64 iDelta;
1739 1975
1740 pIter->iRowidOffset--; 1976 /* Search for the end of the position list within the current page. */
1741 pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; 1977 a = pLeaf->p;
1742 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); 1978 n = pLeaf->szLeaf;
1743 iOff += nPos; 1979
1744 fts5GetVarint(&a[iOff], (u64*)&iDelta); 1980 ASSERT_SZLEAF_OK(pLeaf);
1745 pIter->iRowid -= iDelta; 1981 iOff = pIter->iLeafOffset + pIter->nPos;
1746 fts5SegIterLoadNPos(p, pIter); 1982
1747 }else{ 1983 if( iOff<n ){
1748 fts5SegIterReverseNewPage(p, pIter); 1984 /* The next entry is on the current page. */
1985 assert_nc( iOff<=pIter->iEndofDoclist );
1986 if( iOff>=pIter->iEndofDoclist ){
1987 bNewTerm = 1;
1988 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
1989 iOff += fts5GetVarint32(&a[iOff], nKeep);
1749 } 1990 }
1750 }else{ 1991 }else{
1751 Fts5Data *pLeaf = pIter->pLeaf; 1992 u64 iDelta;
1752 int iOff; 1993 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
1753 int bNewTerm = 0; 1994 pIter->iRowid += iDelta;
1754 int nKeep = 0; 1995 assert_nc( iDelta>0 );
1996 }
1997 pIter->iLeafOffset = iOff;
1755 1998
1756 /* Search for the end of the position list within the current page. */ 1999 }else if( pIter->pSeg==0 ){
1757 u8 *a = pLeaf->p; 2000 const u8 *pList = 0;
1758 int n = pLeaf->szLeaf; 2001 const char *zTerm = 0;
1759 2002 int nList = 0;
2003 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2004 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2005 sqlite3Fts5HashScanNext(p->pHash);
2006 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2007 }
2008 if( pList==0 ){
2009 fts5DataRelease(pIter->pLeaf);
2010 pIter->pLeaf = 0;
2011 }else{
2012 pIter->pLeaf->p = (u8*)pList;
2013 pIter->pLeaf->nn = nList;
2014 pIter->pLeaf->szLeaf = nList;
2015 pIter->iEndofDoclist = nList+1;
2016 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2017 (u8*)zTerm);
2018 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2019 *pbNewTerm = 1;
2020 }
2021 }else{
2022 iOff = 0;
2023 /* Next entry is not on the current page */
2024 while( iOff==0 ){
2025 fts5SegIterNextPage(p, pIter);
2026 pLeaf = pIter->pLeaf;
2027 if( pLeaf==0 ) break;
1760 ASSERT_SZLEAF_OK(pLeaf); 2028 ASSERT_SZLEAF_OK(pLeaf);
1761 iOff = pIter->iLeafOffset + pIter->nPos; 2029 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
1762 2030 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
1763 if( iOff<n ){
1764 /* The next entry is on the current page. */
1765 assert_nc( iOff<=pIter->iEndofDoclist );
1766 if( iOff>=pIter->iEndofDoclist ){
1767 bNewTerm = 1;
1768 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
1769 iOff += fts5GetVarint32(&a[iOff], nKeep);
1770 }
1771 }else{
1772 u64 iDelta;
1773 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
1774 pIter->iRowid += iDelta;
1775 assert_nc( iDelta>0 );
1776 }
1777 pIter->iLeafOffset = iOff; 2031 pIter->iLeafOffset = iOff;
1778 2032
1779 }else if( pIter->pSeg==0 ){ 2033 if( pLeaf->nn>pLeaf->szLeaf ){
1780 const u8 *pList = 0; 2034 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
1781 const char *zTerm = 0; 2035 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
1782 int nList = 0; 2036 );
1783 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
1784 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
1785 sqlite3Fts5HashScanNext(p->pHash);
1786 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1787 }
1788 if( pList==0 ){
1789 fts5DataRelease(pIter->pLeaf);
1790 pIter->pLeaf = 0;
1791 }else{
1792 pIter->pLeaf->p = (u8*)pList;
1793 pIter->pLeaf->nn = nList;
1794 pIter->pLeaf->szLeaf = nList;
1795 pIter->iEndofDoclist = nList+1;
1796 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
1797 (u8*)zTerm);
1798 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1799 *pbNewTerm = 1;
1800 }
1801 }else{
1802 iOff = 0;
1803 /* Next entry is not on the current page */
1804 while( iOff==0 ){
1805 fts5SegIterNextPage(p, pIter);
1806 pLeaf = pIter->pLeaf;
1807 if( pLeaf==0 ) break;
1808 ASSERT_SZLEAF_OK(pLeaf);
1809 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
1810 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
1811 pIter->iLeafOffset = iOff;
1812
1813 if( pLeaf->nn>pLeaf->szLeaf ){
1814 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
1815 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
1816 );
1817 }
1818
1819 }
1820 else if( pLeaf->nn>pLeaf->szLeaf ){
1821 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
1822 &pLeaf->p[pLeaf->szLeaf], iOff
1823 );
1824 pIter->iLeafOffset = iOff;
1825 pIter->iEndofDoclist = iOff;
1826 bNewTerm = 1;
1827 }
1828 if( iOff>=pLeaf->szLeaf ){
1829 p->rc = FTS5_CORRUPT;
1830 return;
1831 }
1832 } 2037 }
1833 } 2038 }
2039 else if( pLeaf->nn>pLeaf->szLeaf ){
2040 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2041 &pLeaf->p[pLeaf->szLeaf], iOff
2042 );
2043 pIter->iLeafOffset = iOff;
2044 pIter->iEndofDoclist = iOff;
2045 bNewTerm = 1;
2046 }
2047 assert_nc( iOff<pLeaf->szLeaf );
2048 if( iOff>pLeaf->szLeaf ){
2049 p->rc = FTS5_CORRUPT;
2050 return;
2051 }
2052 }
2053 }
1834 2054
1835 /* Check if the iterator is now at EOF. If so, return early. */ 2055 /* Check if the iterator is now at EOF. If so, return early. */
1836 if( pIter->pLeaf ){ 2056 if( pIter->pLeaf ){
1837 if( bNewTerm ){ 2057 if( bNewTerm ){
1838 if( pIter->flags & FTS5_SEGITER_ONETERM ){ 2058 if( pIter->flags & FTS5_SEGITER_ONETERM ){
1839 fts5DataRelease(pIter->pLeaf); 2059 fts5DataRelease(pIter->pLeaf);
1840 pIter->pLeaf = 0; 2060 pIter->pLeaf = 0;
1841 }else{ 2061 }else{
1842 fts5SegIterLoadTerm(p, pIter, nKeep); 2062 fts5SegIterLoadTerm(p, pIter, nKeep);
1843 fts5SegIterLoadNPos(p, pIter); 2063 fts5SegIterLoadNPos(p, pIter);
1844 if( pbNewTerm ) *pbNewTerm = 1; 2064 if( pbNewTerm ) *pbNewTerm = 1;
1845 }
1846 }else{
1847 /* The following could be done by calling fts5SegIterLoadNPos(). But
1848 ** this block is particularly performance critical, so equivalent
1849 ** code is inlined. */
1850 int nSz;
1851 assert( p->rc==SQLITE_OK );
1852 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
1853 pIter->bDel = (nSz & 0x0001);
1854 pIter->nPos = nSz>>1;
1855 assert_nc( pIter->nPos>=0 );
1856 }
1857 } 2065 }
2066 }else{
2067 /* The following could be done by calling fts5SegIterLoadNPos(). But
2068 ** this block is particularly performance critical, so equivalent
2069 ** code is inlined.
2070 **
2071 ** Later: Switched back to fts5SegIterLoadNPos() because it supports
2072 ** detail=none mode. Not ideal.
2073 */
2074 int nSz;
2075 assert( p->rc==SQLITE_OK );
2076 assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
2077 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2078 pIter->bDel = (nSz & 0x0001);
2079 pIter->nPos = nSz>>1;
2080 assert_nc( pIter->nPos>=0 );
1858 } 2081 }
1859 } 2082 }
1860 } 2083 }
1861 2084
1862 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } 2085 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
1863 2086
2087 #define fts5IndexSkipVarint(a, iOff) { \
2088 int iEnd = iOff+9; \
2089 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2090 }
2091
1864 /* 2092 /*
1865 ** Iterator pIter currently points to the first rowid in a doclist. This 2093 ** Iterator pIter currently points to the first rowid in a doclist. This
1866 ** function sets the iterator up so that iterates in reverse order through 2094 ** function sets the iterator up so that iterates in reverse order through
1867 ** the doclist. 2095 ** the doclist.
1868 */ 2096 */
1869 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ 2097 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
1870 Fts5DlidxIter *pDlidx = pIter->pDlidx; 2098 Fts5DlidxIter *pDlidx = pIter->pDlidx;
1871 Fts5Data *pLast = 0; 2099 Fts5Data *pLast = 0;
1872 int pgnoLast = 0; 2100 int pgnoLast = 0;
1873 2101
1874 if( pDlidx ){ 2102 if( pDlidx ){
1875 int iSegid = pIter->pSeg->iSegid; 2103 int iSegid = pIter->pSeg->iSegid;
1876 pgnoLast = fts5DlidxIterPgno(pDlidx); 2104 pgnoLast = fts5DlidxIterPgno(pDlidx);
1877 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); 2105 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
1878 }else{ 2106 }else{
1879 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ 2107 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
1880 2108
1881 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of 2109 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
1882 ** position-list content for the current rowid. Back it up so that it 2110 ** position-list content for the current rowid. Back it up so that it
1883 ** points to the start of the position-list size field. */ 2111 ** points to the start of the position-list size field. */
1884 pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); 2112 int iPoslist;
2113 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2114 iPoslist = pIter->iTermLeafOffset;
2115 }else{
2116 iPoslist = 4;
2117 }
2118 fts5IndexSkipVarint(pLeaf->p, iPoslist);
2119 pIter->iLeafOffset = iPoslist;
1885 2120
1886 /* If this condition is true then the largest rowid for the current 2121 /* If this condition is true then the largest rowid for the current
1887 ** term may not be stored on the current page. So search forward to 2122 ** term may not be stored on the current page. So search forward to
1888 ** see where said rowid really is. */ 2123 ** see where said rowid really is. */
1889 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ 2124 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
1890 int pgno; 2125 int pgno;
1891 Fts5StructureSegment *pSeg = pIter->pSeg; 2126 Fts5StructureSegment *pSeg = pIter->pSeg;
1892 2127
1893 /* The last rowid in the doclist may not be on the current page. Search 2128 /* The last rowid in the doclist may not be on the current page. Search
1894 ** forward to find the page containing the last rowid. */ 2129 ** forward to find the page containing the last rowid. */
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
1958 ** term. */ 2193 ** term. */
1959 if( pIter->iTermLeafPgno==pIter->iLeafPgno 2194 if( pIter->iTermLeafPgno==pIter->iLeafPgno
1960 && pIter->iEndofDoclist<pLeaf->szLeaf 2195 && pIter->iEndofDoclist<pLeaf->szLeaf
1961 ){ 2196 ){
1962 return; 2197 return;
1963 } 2198 }
1964 2199
1965 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); 2200 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
1966 } 2201 }
1967 2202
1968 #define fts5IndexSkipVarint(a, iOff) { \
1969 int iEnd = iOff+9; \
1970 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
1971 }
1972
1973 /* 2203 /*
1974 ** The iterator object passed as the second argument currently contains 2204 ** The iterator object passed as the second argument currently contains
1975 ** no valid values except for the Fts5SegIter.pLeaf member variable. This 2205 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
1976 ** function searches the leaf page for a term matching (pTerm/nTerm). 2206 ** function searches the leaf page for a term matching (pTerm/nTerm).
1977 ** 2207 **
1978 ** If the specified term is found on the page, then the iterator is left 2208 ** If the specified term is found on the page, then the iterator is left
1979 ** pointing to it. If argument bGe is zero and the term is not found, 2209 ** pointing to it. If argument bGe is zero and the term is not found,
1980 ** the iterator is left pointing at EOF. 2210 ** the iterator is left pointing at EOF.
1981 ** 2211 **
1982 ** If bGe is non-zero and the specified term is not found, then the 2212 ** If bGe is non-zero and the specified term is not found, then the
(...skipping 17 matching lines...) Expand all
2000 int nNew = 0; 2230 int nNew = 0;
2001 int iTermOff; 2231 int iTermOff;
2002 int iPgidx; /* Current offset in pgidx */ 2232 int iPgidx; /* Current offset in pgidx */
2003 int bEndOfPage = 0; 2233 int bEndOfPage = 0;
2004 2234
2005 assert( p->rc==SQLITE_OK ); 2235 assert( p->rc==SQLITE_OK );
2006 2236
2007 iPgidx = szLeaf; 2237 iPgidx = szLeaf;
2008 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); 2238 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2009 iOff = iTermOff; 2239 iOff = iTermOff;
2240 if( iOff>n ){
2241 p->rc = FTS5_CORRUPT;
2242 return;
2243 }
2010 2244
2011 while( 1 ){ 2245 while( 1 ){
2012 2246
2013 /* Figure out how many new bytes are in this term */ 2247 /* Figure out how many new bytes are in this term */
2014 fts5FastGetVarint32(a, iOff, nNew); 2248 fts5FastGetVarint32(a, iOff, nNew);
2015 if( nKeep<nMatch ){ 2249 if( nKeep<nMatch ){
2016 goto search_failed; 2250 goto search_failed;
2017 } 2251 }
2018 2252
2019 assert( nKeep>=nMatch ); 2253 assert( nKeep>=nMatch );
(...skipping 19 matching lines...) Expand all
2039 2273
2040 if( iPgidx>=n ){ 2274 if( iPgidx>=n ){
2041 bEndOfPage = 1; 2275 bEndOfPage = 1;
2042 break; 2276 break;
2043 } 2277 }
2044 2278
2045 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); 2279 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2046 iTermOff += nKeep; 2280 iTermOff += nKeep;
2047 iOff = iTermOff; 2281 iOff = iTermOff;
2048 2282
2283 if( iOff>=n ){
2284 p->rc = FTS5_CORRUPT;
2285 return;
2286 }
2287
2049 /* Read the nKeep field of the next term. */ 2288 /* Read the nKeep field of the next term. */
2050 fts5FastGetVarint32(a, iOff, nKeep); 2289 fts5FastGetVarint32(a, iOff, nKeep);
2051 } 2290 }
2052 2291
2053 search_failed: 2292 search_failed:
2054 if( bGe==0 ){ 2293 if( bGe==0 ){
2055 fts5DataRelease(pIter->pLeaf); 2294 fts5DataRelease(pIter->pLeaf);
2056 pIter->pLeaf = 0; 2295 pIter->pLeaf = 0;
2057 return; 2296 return;
2058 }else if( bEndOfPage ){ 2297 }else if( bEndOfPage ){
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
2091 int nExtra; 2330 int nExtra;
2092 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); 2331 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2093 pIter->iEndofDoclist = iTermOff + nExtra; 2332 pIter->iEndofDoclist = iTermOff + nExtra;
2094 } 2333 }
2095 pIter->iPgidxOff = iPgidx; 2334 pIter->iPgidxOff = iPgidx;
2096 2335
2097 fts5SegIterLoadRowid(p, pIter); 2336 fts5SegIterLoadRowid(p, pIter);
2098 fts5SegIterLoadNPos(p, pIter); 2337 fts5SegIterLoadNPos(p, pIter);
2099 } 2338 }
2100 2339
2340 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2341 if( p->pIdxSelect==0 ){
2342 Fts5Config *pConfig = p->pConfig;
2343 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2344 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2345 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2346 pConfig->zDb, pConfig->zName
2347 ));
2348 }
2349 return p->pIdxSelect;
2350 }
2351
2101 /* 2352 /*
2102 ** Initialize the object pIter to point to term pTerm/nTerm within segment 2353 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2103 ** pSeg. If there is no such term in the index, the iterator is set to EOF. 2354 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2104 ** 2355 **
2105 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 2356 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2106 ** an error has already occurred when this function is called, it is a no-op. 2357 ** an error has already occurred when this function is called, it is a no-op.
2107 */ 2358 */
2108 static void fts5SegIterSeekInit( 2359 static void fts5SegIterSeekInit(
2109 Fts5Index *p, /* FTS5 backend */ 2360 Fts5Index *p, /* FTS5 backend */
2110 Fts5Buffer *pBuf, /* Buffer to use for loading pages */
2111 const u8 *pTerm, int nTerm, /* Term to seek to */ 2361 const u8 *pTerm, int nTerm, /* Term to seek to */
2112 int flags, /* Mask of FTS5INDEX_XXX flags */ 2362 int flags, /* Mask of FTS5INDEX_XXX flags */
2113 Fts5StructureSegment *pSeg, /* Description of segment */ 2363 Fts5StructureSegment *pSeg, /* Description of segment */
2114 Fts5SegIter *pIter /* Object to populate */ 2364 Fts5SegIter *pIter /* Object to populate */
2115 ){ 2365 ){
2116 int iPg = 1; 2366 int iPg = 1;
2117 int bGe = (flags & FTS5INDEX_QUERY_SCAN); 2367 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2118 int bDlidx = 0; /* True if there is a doclist-index */ 2368 int bDlidx = 0; /* True if there is a doclist-index */
2119 2369 sqlite3_stmt *pIdxSelect = 0;
2120 static int nCall = 0;
2121 nCall++;
2122 2370
2123 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); 2371 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2124 assert( pTerm && nTerm ); 2372 assert( pTerm && nTerm );
2125 memset(pIter, 0, sizeof(*pIter)); 2373 memset(pIter, 0, sizeof(*pIter));
2126 pIter->pSeg = pSeg; 2374 pIter->pSeg = pSeg;
2127 2375
2128 /* This block sets stack variable iPg to the leaf page number that may 2376 /* This block sets stack variable iPg to the leaf page number that may
2129 ** contain term (pTerm/nTerm), if it is present in the segment. */ 2377 ** contain term (pTerm/nTerm), if it is present in the segment. */
2130 if( p->pIdxSelect==0 ){ 2378 pIdxSelect = fts5IdxSelectStmt(p);
2131 Fts5Config *pConfig = p->pConfig;
2132 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2133 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2134 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2135 pConfig->zDb, pConfig->zName
2136 ));
2137 }
2138 if( p->rc ) return; 2379 if( p->rc ) return;
2139 sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid); 2380 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2140 sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); 2381 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2141 if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){ 2382 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2142 i64 val = sqlite3_column_int(p->pIdxSelect, 0); 2383 i64 val = sqlite3_column_int(pIdxSelect, 0);
2143 iPg = (int)(val>>1); 2384 iPg = (int)(val>>1);
2144 bDlidx = (val & 0x0001); 2385 bDlidx = (val & 0x0001);
2145 } 2386 }
2146 p->rc = sqlite3_reset(p->pIdxSelect); 2387 p->rc = sqlite3_reset(pIdxSelect);
2147 2388
2148 if( iPg<pSeg->pgnoFirst ){ 2389 if( iPg<pSeg->pgnoFirst ){
2149 iPg = pSeg->pgnoFirst; 2390 iPg = pSeg->pgnoFirst;
2150 bDlidx = 0; 2391 bDlidx = 0;
2151 } 2392 }
2152 2393
2153 pIter->iLeafPgno = iPg - 1; 2394 pIter->iLeafPgno = iPg - 1;
2154 fts5SegIterNextPage(p, pIter); 2395 fts5SegIterNextPage(p, pIter);
2155 2396
2156 if( pIter->pLeaf ){ 2397 if( pIter->pLeaf ){
2157 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); 2398 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2158 } 2399 }
2159 2400
2160 if( p->rc==SQLITE_OK && bGe==0 ){ 2401 if( p->rc==SQLITE_OK && bGe==0 ){
2161 pIter->flags |= FTS5_SEGITER_ONETERM; 2402 pIter->flags |= FTS5_SEGITER_ONETERM;
2162 if( pIter->pLeaf ){ 2403 if( pIter->pLeaf ){
2163 if( flags & FTS5INDEX_QUERY_DESC ){ 2404 if( flags & FTS5INDEX_QUERY_DESC ){
2164 pIter->flags |= FTS5_SEGITER_REVERSE; 2405 pIter->flags |= FTS5_SEGITER_REVERSE;
2165 } 2406 }
2166 if( bDlidx ){ 2407 if( bDlidx ){
2167 fts5SegIterLoadDlidx(p, pIter); 2408 fts5SegIterLoadDlidx(p, pIter);
2168 } 2409 }
2169 if( flags & FTS5INDEX_QUERY_DESC ){ 2410 if( flags & FTS5INDEX_QUERY_DESC ){
2170 fts5SegIterReverse(p, pIter); 2411 fts5SegIterReverse(p, pIter);
2171 } 2412 }
2172 } 2413 }
2173 } 2414 }
2174 2415
2416 fts5SegIterSetNext(p, pIter);
2417
2175 /* Either: 2418 /* Either:
2176 ** 2419 **
2177 ** 1) an error has occurred, or 2420 ** 1) an error has occurred, or
2178 ** 2) the iterator points to EOF, or 2421 ** 2) the iterator points to EOF, or
2179 ** 3) the iterator points to an entry with term (pTerm/nTerm), or 2422 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2180 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points 2423 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2181 ** to an entry with a term greater than or equal to (pTerm/nTerm). 2424 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2182 */ 2425 */
2183 assert( p->rc!=SQLITE_OK /* 1 */ 2426 assert( p->rc!=SQLITE_OK /* 1 */
2184 || pIter->pLeaf==0 /* 2 */ 2427 || pIter->pLeaf==0 /* 2 */
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2222 2465
2223 if( pList ){ 2466 if( pList ){
2224 Fts5Data *pLeaf; 2467 Fts5Data *pLeaf;
2225 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); 2468 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2226 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); 2469 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2227 if( pLeaf==0 ) return; 2470 if( pLeaf==0 ) return;
2228 pLeaf->p = (u8*)pList; 2471 pLeaf->p = (u8*)pList;
2229 pLeaf->nn = pLeaf->szLeaf = nList; 2472 pLeaf->nn = pLeaf->szLeaf = nList;
2230 pIter->pLeaf = pLeaf; 2473 pIter->pLeaf = pLeaf;
2231 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); 2474 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2232 pIter->iEndofDoclist = pLeaf->nn+1; 2475 pIter->iEndofDoclist = pLeaf->nn;
2233 2476
2234 if( flags & FTS5INDEX_QUERY_DESC ){ 2477 if( flags & FTS5INDEX_QUERY_DESC ){
2235 pIter->flags |= FTS5_SEGITER_REVERSE; 2478 pIter->flags |= FTS5_SEGITER_REVERSE;
2236 fts5SegIterReverseInitPage(p, pIter); 2479 fts5SegIterReverseInitPage(p, pIter);
2237 }else{ 2480 }else{
2238 fts5SegIterLoadNPos(p, pIter); 2481 fts5SegIterLoadNPos(p, pIter);
2239 } 2482 }
2240 } 2483 }
2484
2485 fts5SegIterSetNext(p, pIter);
2241 } 2486 }
2242 2487
2243 /* 2488 /*
2244 ** Zero the iterator passed as the only argument. 2489 ** Zero the iterator passed as the only argument.
2245 */ 2490 */
2246 static void fts5SegIterClear(Fts5SegIter *pIter){ 2491 static void fts5SegIterClear(Fts5SegIter *pIter){
2247 fts5BufferFree(&pIter->term); 2492 fts5BufferFree(&pIter->term);
2248 fts5DataRelease(pIter->pLeaf); 2493 fts5DataRelease(pIter->pLeaf);
2249 fts5DataRelease(pIter->pNextLeaf); 2494 fts5DataRelease(pIter->pNextLeaf);
2250 fts5DlidxIterFree(pIter->pDlidx); 2495 fts5DlidxIterFree(pIter->pDlidx);
2251 sqlite3_free(pIter->aRowidOffset); 2496 sqlite3_free(pIter->aRowidOffset);
2252 memset(pIter, 0, sizeof(Fts5SegIter)); 2497 memset(pIter, 0, sizeof(Fts5SegIter));
2253 } 2498 }
2254 2499
2255 #ifdef SQLITE_DEBUG 2500 #ifdef SQLITE_DEBUG
2256 2501
2257 /* 2502 /*
2258 ** This function is used as part of the big assert() procedure implemented by 2503 ** This function is used as part of the big assert() procedure implemented by
2259 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored 2504 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2260 ** in *pRes is the correct result of comparing the current positions of the 2505 ** in *pRes is the correct result of comparing the current positions of the
2261 ** two iterators. 2506 ** two iterators.
2262 */ 2507 */
2263 static void fts5AssertComparisonResult( 2508 static void fts5AssertComparisonResult(
2264 Fts5IndexIter *pIter, 2509 Fts5Iter *pIter,
2265 Fts5SegIter *p1, 2510 Fts5SegIter *p1,
2266 Fts5SegIter *p2, 2511 Fts5SegIter *p2,
2267 Fts5CResult *pRes 2512 Fts5CResult *pRes
2268 ){ 2513 ){
2269 int i1 = p1 - pIter->aSeg; 2514 int i1 = p1 - pIter->aSeg;
2270 int i2 = p2 - pIter->aSeg; 2515 int i2 = p2 - pIter->aSeg;
2271 2516
2272 if( p1->pLeaf || p2->pLeaf ){ 2517 if( p1->pLeaf || p2->pLeaf ){
2273 if( p1->pLeaf==0 ){ 2518 if( p1->pLeaf==0 ){
2274 assert( pRes->iFirst==i2 ); 2519 assert( pRes->iFirst==i2 );
(...skipping 20 matching lines...) Expand all
2295 } 2540 }
2296 } 2541 }
2297 } 2542 }
2298 2543
2299 /* 2544 /*
2300 ** This function is a no-op unless SQLITE_DEBUG is defined when this module 2545 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2301 ** is compiled. In that case, this function is essentially an assert() 2546 ** is compiled. In that case, this function is essentially an assert()
2302 ** statement used to verify that the contents of the pIter->aFirst[] array 2547 ** statement used to verify that the contents of the pIter->aFirst[] array
2303 ** are correct. 2548 ** are correct.
2304 */ 2549 */
2305 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5IndexIter *pIter){ 2550 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2306 if( p->rc==SQLITE_OK ){ 2551 if( p->rc==SQLITE_OK ){
2307 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 2552 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2308 int i; 2553 int i;
2309 2554
2310 assert( (pFirst->pLeaf==0)==pIter->bEof ); 2555 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2311 2556
2312 /* Check that pIter->iSwitchRowid is set correctly. */ 2557 /* Check that pIter->iSwitchRowid is set correctly. */
2313 for(i=0; i<pIter->nSeg; i++){ 2558 for(i=0; i<pIter->nSeg; i++){
2314 Fts5SegIter *p1 = &pIter->aSeg[i]; 2559 Fts5SegIter *p1 = &pIter->aSeg[i];
2315 assert( p1==pFirst 2560 assert( p1==pFirst
2316 || p1->pLeaf==0 2561 || p1->pLeaf==0
2317 || fts5BufferCompare(&pFirst->term, &p1->term) 2562 || fts5BufferCompare(&pFirst->term, &p1->term)
2318 || p1->iRowid==pIter->iSwitchRowid 2563 || p1->iRowid==pIter->iSwitchRowid
2319 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev 2564 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2320 ); 2565 );
(...skipping 19 matching lines...) Expand all
2340 #endif 2585 #endif
2341 2586
2342 /* 2587 /*
2343 ** Do the comparison necessary to populate pIter->aFirst[iOut]. 2588 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2344 ** 2589 **
2345 ** If the returned value is non-zero, then it is the index of an entry 2590 ** If the returned value is non-zero, then it is the index of an entry
2346 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing 2591 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2347 ** to a key that is a duplicate of another, higher priority, 2592 ** to a key that is a duplicate of another, higher priority,
2348 ** segment-iterator in the pSeg->aSeg[] array. 2593 ** segment-iterator in the pSeg->aSeg[] array.
2349 */ 2594 */
2350 static int fts5MultiIterDoCompare(Fts5IndexIter *pIter, int iOut){ 2595 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2351 int i1; /* Index of left-hand Fts5SegIter */ 2596 int i1; /* Index of left-hand Fts5SegIter */
2352 int i2; /* Index of right-hand Fts5SegIter */ 2597 int i2; /* Index of right-hand Fts5SegIter */
2353 int iRes; 2598 int iRes;
2354 Fts5SegIter *p1; /* Left-hand Fts5SegIter */ 2599 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
2355 Fts5SegIter *p2; /* Right-hand Fts5SegIter */ 2600 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
2356 Fts5CResult *pRes = &pIter->aFirst[iOut]; 2601 Fts5CResult *pRes = &pIter->aFirst[iOut];
2357 2602
2358 assert( iOut<pIter->nSeg && iOut>0 ); 2603 assert( iOut<pIter->nSeg && iOut>0 );
2359 assert( pIter->bRev==0 || pIter->bRev==1 ); 2604 assert( pIter->bRev==0 || pIter->bRev==1 );
2360 2605
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
2474 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); 2719 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2475 2720
2476 if( iLeafPgno<pIter->iLeafPgno ){ 2721 if( iLeafPgno<pIter->iLeafPgno ){
2477 pIter->iLeafPgno = iLeafPgno+1; 2722 pIter->iLeafPgno = iLeafPgno+1;
2478 fts5SegIterReverseNewPage(p, pIter); 2723 fts5SegIterReverseNewPage(p, pIter);
2479 bMove = 0; 2724 bMove = 0;
2480 } 2725 }
2481 } 2726 }
2482 2727
2483 do{ 2728 do{
2484 if( bMove ) fts5SegIterNext(p, pIter, 0); 2729 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2485 if( pIter->pLeaf==0 ) break; 2730 if( pIter->pLeaf==0 ) break;
2486 if( bRev==0 && pIter->iRowid>=iMatch ) break; 2731 if( bRev==0 && pIter->iRowid>=iMatch ) break;
2487 if( bRev!=0 && pIter->iRowid<=iMatch ) break; 2732 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2488 bMove = 1; 2733 bMove = 1;
2489 }while( p->rc==SQLITE_OK ); 2734 }while( p->rc==SQLITE_OK );
2490 } 2735 }
2491 2736
2492 2737
2493 /* 2738 /*
2494 ** Free the iterator object passed as the second argument. 2739 ** Free the iterator object passed as the second argument.
2495 */ 2740 */
2496 static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){ 2741 static void fts5MultiIterFree(Fts5Iter *pIter){
2497 if( pIter ){ 2742 if( pIter ){
2498 int i; 2743 int i;
2499 for(i=0; i<pIter->nSeg; i++){ 2744 for(i=0; i<pIter->nSeg; i++){
2500 fts5SegIterClear(&pIter->aSeg[i]); 2745 fts5SegIterClear(&pIter->aSeg[i]);
2501 } 2746 }
2502 fts5StructureRelease(pIter->pStruct); 2747 fts5StructureRelease(pIter->pStruct);
2503 fts5BufferFree(&pIter->poslist); 2748 fts5BufferFree(&pIter->poslist);
2504 sqlite3_free(pIter); 2749 sqlite3_free(pIter);
2505 } 2750 }
2506 } 2751 }
2507 2752
2508 static void fts5MultiIterAdvanced( 2753 static void fts5MultiIterAdvanced(
2509 Fts5Index *p, /* FTS5 backend to iterate within */ 2754 Fts5Index *p, /* FTS5 backend to iterate within */
2510 Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */ 2755 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2511 int iChanged, /* Index of sub-iterator just advanced */ 2756 int iChanged, /* Index of sub-iterator just advanced */
2512 int iMinset /* Minimum entry in aFirst[] to set */ 2757 int iMinset /* Minimum entry in aFirst[] to set */
2513 ){ 2758 ){
2514 int i; 2759 int i;
2515 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ 2760 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2516 int iEq; 2761 int iEq;
2517 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ 2762 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2518 fts5SegIterNext(p, &pIter->aSeg[iEq], 0); 2763 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2764 assert( p->rc==SQLITE_OK );
2765 pSeg->xNext(p, pSeg, 0);
2519 i = pIter->nSeg + iEq; 2766 i = pIter->nSeg + iEq;
2520 } 2767 }
2521 } 2768 }
2522 } 2769 }
2523 2770
2524 /* 2771 /*
2525 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still 2772 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2526 ** points to the same term though - just a different rowid. This function 2773 ** points to the same term though - just a different rowid. This function
2527 ** attempts to update the contents of the pIter->aFirst[] accordingly. 2774 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2528 ** If it does so successfully, 0 is returned. Otherwise 1. 2775 ** If it does so successfully, 0 is returned. Otherwise 1.
2529 ** 2776 **
2530 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() 2777 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2531 ** on the iterator instead. That function does the same as this one, except 2778 ** on the iterator instead. That function does the same as this one, except
2532 ** that it deals with more complicated cases as well. 2779 ** that it deals with more complicated cases as well.
2533 */ 2780 */
2534 static int fts5MultiIterAdvanceRowid( 2781 static int fts5MultiIterAdvanceRowid(
2535 Fts5Index *p, /* FTS5 backend to iterate within */ 2782 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2536 Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */ 2783 int iChanged, /* Index of sub-iterator just advanced */
2537 int iChanged /* Index of sub-iterator just advanced */ 2784 Fts5SegIter **ppFirst
2538 ){ 2785 ){
2539 Fts5SegIter *pNew = &pIter->aSeg[iChanged]; 2786 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2540 2787
2541 if( pNew->iRowid==pIter->iSwitchRowid 2788 if( pNew->iRowid==pIter->iSwitchRowid
2542 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev 2789 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2543 ){ 2790 ){
2544 int i; 2791 int i;
2545 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; 2792 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2546 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64; 2793 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2547 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ 2794 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
(...skipping 12 matching lines...) Expand all
2560 pIter->iSwitchRowid = pOther->iRowid; 2807 pIter->iSwitchRowid = pOther->iRowid;
2561 } 2808 }
2562 } 2809 }
2563 pRes->iFirst = (u16)(pNew - pIter->aSeg); 2810 pRes->iFirst = (u16)(pNew - pIter->aSeg);
2564 if( i==1 ) break; 2811 if( i==1 ) break;
2565 2812
2566 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; 2813 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2567 } 2814 }
2568 } 2815 }
2569 2816
2817 *ppFirst = pNew;
2570 return 0; 2818 return 0;
2571 } 2819 }
2572 2820
2573 /* 2821 /*
2574 ** Set the pIter->bEof variable based on the state of the sub-iterators. 2822 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2575 */ 2823 */
2576 static void fts5MultiIterSetEof(Fts5IndexIter *pIter){ 2824 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2577 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 2825 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2578 pIter->bEof = pSeg->pLeaf==0; 2826 pIter->base.bEof = pSeg->pLeaf==0;
2579 pIter->iSwitchRowid = pSeg->iRowid; 2827 pIter->iSwitchRowid = pSeg->iRowid;
2580 } 2828 }
2581 2829
2582 /* 2830 /*
2583 ** Move the iterator to the next entry. 2831 ** Move the iterator to the next entry.
2584 ** 2832 **
2585 ** If an error occurs, an error code is left in Fts5Index.rc. It is not 2833 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2586 ** considered an error if the iterator reaches EOF, or if it is already at 2834 ** considered an error if the iterator reaches EOF, or if it is already at
2587 ** EOF when this function is called. 2835 ** EOF when this function is called.
2588 */ 2836 */
2589 static void fts5MultiIterNext( 2837 static void fts5MultiIterNext(
2590 Fts5Index *p, 2838 Fts5Index *p,
2591 Fts5IndexIter *pIter, 2839 Fts5Iter *pIter,
2592 int bFrom, /* True if argument iFrom is valid */ 2840 int bFrom, /* True if argument iFrom is valid */
2593 i64 iFrom /* Advance at least as far as this */ 2841 i64 iFrom /* Advance at least as far as this */
2594 ){ 2842 ){
2595 if( p->rc==SQLITE_OK ){ 2843 int bUseFrom = bFrom;
2596 int bUseFrom = bFrom; 2844 assert( pIter->base.bEof==0 );
2597 do { 2845 while( p->rc==SQLITE_OK ){
2598 int iFirst = pIter->aFirst[1].iFirst; 2846 int iFirst = pIter->aFirst[1].iFirst;
2599 int bNewTerm = 0; 2847 int bNewTerm = 0;
2600 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; 2848 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2601 assert( p->rc==SQLITE_OK ); 2849 assert( p->rc==SQLITE_OK );
2602 if( bUseFrom && pSeg->pDlidx ){ 2850 if( bUseFrom && pSeg->pDlidx ){
2603 fts5SegIterNextFrom(p, pSeg, iFrom); 2851 fts5SegIterNextFrom(p, pSeg, iFrom);
2604 }else{ 2852 }else{
2605 fts5SegIterNext(p, pSeg, &bNewTerm); 2853 pSeg->xNext(p, pSeg, &bNewTerm);
2606 } 2854 }
2607 2855
2608 if( pSeg->pLeaf==0 || bNewTerm 2856 if( pSeg->pLeaf==0 || bNewTerm
2609 || fts5MultiIterAdvanceRowid(p, pIter, iFirst) 2857 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2610 ){ 2858 ){
2611 fts5MultiIterAdvanced(p, pIter, iFirst, 1); 2859 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2612 fts5MultiIterSetEof(pIter); 2860 fts5MultiIterSetEof(pIter);
2613 } 2861 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2614 fts5AssertMultiIterSetup(p, pIter); 2862 if( pSeg->pLeaf==0 ) return;
2863 }
2615 2864
2616 bUseFrom = 0; 2865 fts5AssertMultiIterSetup(p, pIter);
2617 }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) ); 2866 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2867 if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2868 pIter->xSetOutputs(pIter, pSeg);
2869 return;
2870 }
2871 bUseFrom = 0;
2618 } 2872 }
2619 } 2873 }
2620 2874
2621 static void fts5MultiIterNext2( 2875 static void fts5MultiIterNext2(
2622 Fts5Index *p, 2876 Fts5Index *p,
2623 Fts5IndexIter *pIter, 2877 Fts5Iter *pIter,
2624 int *pbNewTerm /* OUT: True if *might* be new term */ 2878 int *pbNewTerm /* OUT: True if *might* be new term */
2625 ){ 2879 ){
2626 assert( pIter->bSkipEmpty ); 2880 assert( pIter->bSkipEmpty );
2627 if( p->rc==SQLITE_OK ){ 2881 if( p->rc==SQLITE_OK ){
2628 do { 2882 do {
2629 int iFirst = pIter->aFirst[1].iFirst; 2883 int iFirst = pIter->aFirst[1].iFirst;
2630 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; 2884 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2631 int bNewTerm = 0; 2885 int bNewTerm = 0;
2632 2886
2633 fts5SegIterNext(p, pSeg, &bNewTerm); 2887 assert( p->rc==SQLITE_OK );
2888 pSeg->xNext(p, pSeg, &bNewTerm);
2634 if( pSeg->pLeaf==0 || bNewTerm 2889 if( pSeg->pLeaf==0 || bNewTerm
2635 || fts5MultiIterAdvanceRowid(p, pIter, iFirst) 2890 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2636 ){ 2891 ){
2637 fts5MultiIterAdvanced(p, pIter, iFirst, 1); 2892 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2638 fts5MultiIterSetEof(pIter); 2893 fts5MultiIterSetEof(pIter);
2639 *pbNewTerm = 1; 2894 *pbNewTerm = 1;
2640 }else{ 2895 }else{
2641 *pbNewTerm = 0; 2896 *pbNewTerm = 0;
2642 } 2897 }
2643 fts5AssertMultiIterSetup(p, pIter); 2898 fts5AssertMultiIterSetup(p, pIter);
2644 2899
2645 }while( fts5MultiIterIsEmpty(p, pIter) ); 2900 }while( fts5MultiIterIsEmpty(p, pIter) );
2646 } 2901 }
2647 } 2902 }
2648 2903
2904 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2905 UNUSED_PARAM2(pUnused1, pUnused2);
2906 }
2649 2907
2650 static Fts5IndexIter *fts5MultiIterAlloc( 2908 static Fts5Iter *fts5MultiIterAlloc(
2651 Fts5Index *p, /* FTS5 backend to iterate within */ 2909 Fts5Index *p, /* FTS5 backend to iterate within */
2652 int nSeg 2910 int nSeg
2653 ){ 2911 ){
2654 Fts5IndexIter *pNew; 2912 Fts5Iter *pNew;
2655 int nSlot; /* Power of two >= nSeg */ 2913 int nSlot; /* Power of two >= nSeg */
2656 2914
2657 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); 2915 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2658 pNew = fts5IdxMalloc(p, 2916 pNew = fts5IdxMalloc(p,
2659 sizeof(Fts5IndexIter) + /* pNew */ 2917 sizeof(Fts5Iter) + /* pNew */
2660 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ 2918 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
2661 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ 2919 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
2662 ); 2920 );
2663 if( pNew ){ 2921 if( pNew ){
2664 pNew->nSeg = nSlot; 2922 pNew->nSeg = nSlot;
2665 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; 2923 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
2666 pNew->pIndex = p; 2924 pNew->pIndex = p;
2925 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
2667 } 2926 }
2668 return pNew; 2927 return pNew;
2669 } 2928 }
2670 2929
2671 /* 2930 static void fts5PoslistCallback(
2672 ** Allocate a new Fts5IndexIter object. 2931 Fts5Index *pUnused,
2932 void *pContext,
2933 const u8 *pChunk, int nChunk
2934 ){
2935 UNUSED_PARAM(pUnused);
2936 assert_nc( nChunk>=0 );
2937 if( nChunk>0 ){
2938 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
2939 }
2940 }
2941
2942 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
2943 struct PoslistCallbackCtx {
2944 Fts5Buffer *pBuf; /* Append to this buffer */
2945 Fts5Colset *pColset; /* Restrict matches to this column */
2946 int eState; /* See above */
2947 };
2948
2949 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
2950 struct PoslistOffsetsCtx {
2951 Fts5Buffer *pBuf; /* Append to this buffer */
2952 Fts5Colset *pColset; /* Restrict matches to this column */
2953 int iRead;
2954 int iWrite;
2955 };
2956
2957 /*
2958 ** TODO: Make this more efficient!
2959 */
2960 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
2961 int i;
2962 for(i=0; i<pColset->nCol; i++){
2963 if( pColset->aiCol[i]==iCol ) return 1;
2964 }
2965 return 0;
2966 }
2967
2968 static void fts5PoslistOffsetsCallback(
2969 Fts5Index *pUnused,
2970 void *pContext,
2971 const u8 *pChunk, int nChunk
2972 ){
2973 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
2974 UNUSED_PARAM(pUnused);
2975 assert_nc( nChunk>=0 );
2976 if( nChunk>0 ){
2977 int i = 0;
2978 while( i<nChunk ){
2979 int iVal;
2980 i += fts5GetVarint32(&pChunk[i], iVal);
2981 iVal += pCtx->iRead - 2;
2982 pCtx->iRead = iVal;
2983 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
2984 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
2985 pCtx->iWrite = iVal;
2986 }
2987 }
2988 }
2989 }
2990
2991 static void fts5PoslistFilterCallback(
2992 Fts5Index *pUnused,
2993 void *pContext,
2994 const u8 *pChunk, int nChunk
2995 ){
2996 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
2997 UNUSED_PARAM(pUnused);
2998 assert_nc( nChunk>=0 );
2999 if( nChunk>0 ){
3000 /* Search through to find the first varint with value 1. This is the
3001 ** start of the next columns hits. */
3002 int i = 0;
3003 int iStart = 0;
3004
3005 if( pCtx->eState==2 ){
3006 int iCol;
3007 fts5FastGetVarint32(pChunk, i, iCol);
3008 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3009 pCtx->eState = 1;
3010 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3011 }else{
3012 pCtx->eState = 0;
3013 }
3014 }
3015
3016 do {
3017 while( i<nChunk && pChunk[i]!=0x01 ){
3018 while( pChunk[i] & 0x80 ) i++;
3019 i++;
3020 }
3021 if( pCtx->eState ){
3022 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3023 }
3024 if( i<nChunk ){
3025 int iCol;
3026 iStart = i;
3027 i++;
3028 if( i>=nChunk ){
3029 pCtx->eState = 2;
3030 }else{
3031 fts5FastGetVarint32(pChunk, i, iCol);
3032 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3033 if( pCtx->eState ){
3034 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3035 iStart = i;
3036 }
3037 }
3038 }
3039 }while( i<nChunk );
3040 }
3041 }
3042
3043 static void fts5ChunkIterate(
3044 Fts5Index *p, /* Index object */
3045 Fts5SegIter *pSeg, /* Poslist of this iterator */
3046 void *pCtx, /* Context pointer for xChunk callback */
3047 void (*xChunk)(Fts5Index*, void*, const u8*, int)
3048 ){
3049 int nRem = pSeg->nPos; /* Number of bytes still to come */
3050 Fts5Data *pData = 0;
3051 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3052 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3053 int pgno = pSeg->iLeafPgno;
3054 int pgnoSave = 0;
3055
3056 /* This function does notmwork with detail=none databases. */
3057 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3058
3059 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3060 pgnoSave = pgno+1;
3061 }
3062
3063 while( 1 ){
3064 xChunk(p, pCtx, pChunk, nChunk);
3065 nRem -= nChunk;
3066 fts5DataRelease(pData);
3067 if( nRem<=0 ){
3068 break;
3069 }else{
3070 pgno++;
3071 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3072 if( pData==0 ) break;
3073 pChunk = &pData->p[4];
3074 nChunk = MIN(nRem, pData->szLeaf - 4);
3075 if( pgno==pgnoSave ){
3076 assert( pSeg->pNextLeaf==0 );
3077 pSeg->pNextLeaf = pData;
3078 pData = 0;
3079 }
3080 }
3081 }
3082 }
3083
3084 /*
3085 ** Iterator pIter currently points to a valid entry (not EOF). This
3086 ** function appends the position list data for the current entry to
3087 ** buffer pBuf. It does not make a copy of the position-list size
3088 ** field.
3089 */
3090 static void fts5SegiterPoslist(
3091 Fts5Index *p,
3092 Fts5SegIter *pSeg,
3093 Fts5Colset *pColset,
3094 Fts5Buffer *pBuf
3095 ){
3096 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
3097 if( pColset==0 ){
3098 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3099 }else{
3100 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3101 PoslistCallbackCtx sCtx;
3102 sCtx.pBuf = pBuf;
3103 sCtx.pColset = pColset;
3104 sCtx.eState = fts5IndexColsetTest(pColset, 0);
3105 assert( sCtx.eState==0 || sCtx.eState==1 );
3106 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3107 }else{
3108 PoslistOffsetsCtx sCtx;
3109 memset(&sCtx, 0, sizeof(sCtx));
3110 sCtx.pBuf = pBuf;
3111 sCtx.pColset = pColset;
3112 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3113 }
3114 }
3115 }
3116 }
3117
3118 /*
3119 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
3120 ** the position list contains entries for column iCol, then (*pa) is set
3121 ** to point to the sub-position-list for that column and the number of
3122 ** bytes in it returned. Or, if the argument position list does not
3123 ** contain any entries for column iCol, return 0.
3124 */
3125 static int fts5IndexExtractCol(
3126 const u8 **pa, /* IN/OUT: Pointer to poslist */
3127 int n, /* IN: Size of poslist in bytes */
3128 int iCol /* Column to extract from poslist */
3129 ){
3130 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
3131 const u8 *p = *pa;
3132 const u8 *pEnd = &p[n]; /* One byte past end of position list */
3133
3134 while( iCol>iCurrent ){
3135 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3136 ** not part of a varint. Note that it is not possible for a negative
3137 ** or extremely large varint to occur within an uncorrupted position
3138 ** list. So the last byte of each varint may be assumed to have a clear
3139 ** 0x80 bit. */
3140 while( *p!=0x01 ){
3141 while( *p++ & 0x80 );
3142 if( p>=pEnd ) return 0;
3143 }
3144 *pa = p++;
3145 iCurrent = *p++;
3146 if( iCurrent & 0x80 ){
3147 p--;
3148 p += fts5GetVarint32(p, iCurrent);
3149 }
3150 }
3151 if( iCol!=iCurrent ) return 0;
3152
3153 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3154 ** not part of a varint */
3155 while( p<pEnd && *p!=0x01 ){
3156 while( *p++ & 0x80 );
3157 }
3158
3159 return p - (*pa);
3160 }
3161
3162 static int fts5IndexExtractColset (
3163 Fts5Colset *pColset, /* Colset to filter on */
3164 const u8 *pPos, int nPos, /* Position list */
3165 Fts5Buffer *pBuf /* Output buffer */
3166 ){
3167 int rc = SQLITE_OK;
3168 int i;
3169
3170 fts5BufferZero(pBuf);
3171 for(i=0; i<pColset->nCol; i++){
3172 const u8 *pSub = pPos;
3173 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
3174 if( nSub ){
3175 fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
3176 }
3177 }
3178 return rc;
3179 }
3180
3181 /*
3182 ** xSetOutputs callback used by detail=none tables.
3183 */
3184 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3185 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3186 pIter->base.iRowid = pSeg->iRowid;
3187 pIter->base.nData = pSeg->nPos;
3188 }
3189
3190 /*
3191 ** xSetOutputs callback used by detail=full and detail=col tables when no
3192 ** column filters are specified.
3193 */
3194 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3195 pIter->base.iRowid = pSeg->iRowid;
3196 pIter->base.nData = pSeg->nPos;
3197
3198 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3199 assert( pIter->pColset==0 );
3200
3201 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3202 /* All data is stored on the current page. Populate the output
3203 ** variables to point into the body of the page object. */
3204 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3205 }else{
3206 /* The data is distributed over two or more pages. Copy it into the
3207 ** Fts5Iter.poslist buffer and then set the output pointer to point
3208 ** to this buffer. */
3209 fts5BufferZero(&pIter->poslist);
3210 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3211 pIter->base.pData = pIter->poslist.p;
3212 }
3213 }
3214
3215 /*
3216 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3217 ** against no columns at all).
3218 */
3219 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3220 UNUSED_PARAM(pSeg);
3221 pIter->base.nData = 0;
3222 }
3223
3224 /*
3225 ** xSetOutputs callback used by detail=col when there is a column filter
3226 ** and there are 100 or more columns. Also called as a fallback from
3227 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3228 */
3229 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3230 fts5BufferZero(&pIter->poslist);
3231 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3232 pIter->base.iRowid = pSeg->iRowid;
3233 pIter->base.pData = pIter->poslist.p;
3234 pIter->base.nData = pIter->poslist.n;
3235 }
3236
3237 /*
3238 ** xSetOutputs callback used when:
3239 **
3240 ** * detail=col,
3241 ** * there is a column filter, and
3242 ** * the table contains 100 or fewer columns.
3243 **
3244 ** The last point is to ensure all column numbers are stored as
3245 ** single-byte varints.
3246 */
3247 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3248
3249 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3250 assert( pIter->pColset );
3251
3252 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3253 fts5IterSetOutputs_Col(pIter, pSeg);
3254 }else{
3255 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3256 u8 *pEnd = (u8*)&a[pSeg->nPos];
3257 int iPrev = 0;
3258 int *aiCol = pIter->pColset->aiCol;
3259 int *aiColEnd = &aiCol[pIter->pColset->nCol];
3260
3261 u8 *aOut = pIter->poslist.p;
3262 int iPrevOut = 0;
3263
3264 pIter->base.iRowid = pSeg->iRowid;
3265
3266 while( a<pEnd ){
3267 iPrev += (int)a++[0] - 2;
3268 while( *aiCol<iPrev ){
3269 aiCol++;
3270 if( aiCol==aiColEnd ) goto setoutputs_col_out;
3271 }
3272 if( *aiCol==iPrev ){
3273 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3274 iPrevOut = iPrev;
3275 }
3276 }
3277
3278 setoutputs_col_out:
3279 pIter->base.pData = pIter->poslist.p;
3280 pIter->base.nData = aOut - pIter->poslist.p;
3281 }
3282 }
3283
3284 /*
3285 ** xSetOutputs callback used by detail=full when there is a column filter.
3286 */
3287 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3288 Fts5Colset *pColset = pIter->pColset;
3289 pIter->base.iRowid = pSeg->iRowid;
3290
3291 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3292 assert( pColset );
3293
3294 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3295 /* All data is stored on the current page. Populate the output
3296 ** variables to point into the body of the page object. */
3297 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3298 if( pColset->nCol==1 ){
3299 pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
3300 pIter->base.pData = a;
3301 }else{
3302 fts5BufferZero(&pIter->poslist);
3303 fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist);
3304 pIter->base.pData = pIter->poslist.p;
3305 pIter->base.nData = pIter->poslist.n;
3306 }
3307 }else{
3308 /* The data is distributed over two or more pages. Copy it into the
3309 ** Fts5Iter.poslist buffer and then set the output pointer to point
3310 ** to this buffer. */
3311 fts5BufferZero(&pIter->poslist);
3312 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3313 pIter->base.pData = pIter->poslist.p;
3314 pIter->base.nData = pIter->poslist.n;
3315 }
3316 }
3317
3318 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3319 if( *pRc==SQLITE_OK ){
3320 Fts5Config *pConfig = pIter->pIndex->pConfig;
3321 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3322 pIter->xSetOutputs = fts5IterSetOutputs_None;
3323 }
3324
3325 else if( pIter->pColset==0 ){
3326 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3327 }
3328
3329 else if( pIter->pColset->nCol==0 ){
3330 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3331 }
3332
3333 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3334 pIter->xSetOutputs = fts5IterSetOutputs_Full;
3335 }
3336
3337 else{
3338 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3339 if( pConfig->nCol<=100 ){
3340 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3341 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3342 }else{
3343 pIter->xSetOutputs = fts5IterSetOutputs_Col;
3344 }
3345 }
3346 }
3347 }
3348
3349
3350 /*
3351 ** Allocate a new Fts5Iter object.
2673 ** 3352 **
2674 ** The new object will be used to iterate through data in structure pStruct. 3353 ** The new object will be used to iterate through data in structure pStruct.
2675 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel 3354 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
2676 ** is zero or greater, data from the first nSegment segments on level iLevel 3355 ** is zero or greater, data from the first nSegment segments on level iLevel
2677 ** is merged. 3356 ** is merged.
2678 ** 3357 **
2679 ** The iterator initially points to the first term/rowid entry in the 3358 ** The iterator initially points to the first term/rowid entry in the
2680 ** iterated data. 3359 ** iterated data.
2681 */ 3360 */
2682 static void fts5MultiIterNew( 3361 static void fts5MultiIterNew(
2683 Fts5Index *p, /* FTS5 backend to iterate within */ 3362 Fts5Index *p, /* FTS5 backend to iterate within */
2684 Fts5Structure *pStruct, /* Structure of specific index */ 3363 Fts5Structure *pStruct, /* Structure of specific index */
2685 int bSkipEmpty, /* True to ignore delete-keys */
2686 int flags, /* FTS5INDEX_QUERY_XXX flags */ 3364 int flags, /* FTS5INDEX_QUERY_XXX flags */
3365 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
2687 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ 3366 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
2688 int iLevel, /* Level to iterate (-1 for all) */ 3367 int iLevel, /* Level to iterate (-1 for all) */
2689 int nSegment, /* Number of segments to merge (iLevel>=0) */ 3368 int nSegment, /* Number of segments to merge (iLevel>=0) */
2690 Fts5IndexIter **ppOut /* New object */ 3369 Fts5Iter **ppOut /* New object */
2691 ){ 3370 ){
2692 int nSeg = 0; /* Number of segment-iters in use */ 3371 int nSeg = 0; /* Number of segment-iters in use */
2693 int iIter = 0; /* */ 3372 int iIter = 0; /* */
2694 int iSeg; /* Used to iterate through segments */ 3373 int iSeg; /* Used to iterate through segments */
2695 Fts5Buffer buf = {0,0,0}; /* Buffer used by fts5SegIterSeekInit() */
2696 Fts5StructureLevel *pLvl; 3374 Fts5StructureLevel *pLvl;
2697 Fts5IndexIter *pNew; 3375 Fts5Iter *pNew;
2698 3376
2699 assert( (pTerm==0 && nTerm==0) || iLevel<0 ); 3377 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
2700 3378
2701 /* Allocate space for the new multi-seg-iterator. */ 3379 /* Allocate space for the new multi-seg-iterator. */
2702 if( p->rc==SQLITE_OK ){ 3380 if( p->rc==SQLITE_OK ){
2703 if( iLevel<0 ){ 3381 if( iLevel<0 ){
2704 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); 3382 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
2705 nSeg = pStruct->nSegment; 3383 nSeg = pStruct->nSegment;
2706 nSeg += (p->pHash ? 1 : 0); 3384 nSeg += (p->pHash ? 1 : 0);
2707 }else{ 3385 }else{
2708 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); 3386 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
2709 } 3387 }
2710 } 3388 }
2711 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); 3389 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
2712 if( pNew==0 ) return; 3390 if( pNew==0 ) return;
2713 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); 3391 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
2714 pNew->bSkipEmpty = (u8)bSkipEmpty; 3392 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
2715 pNew->pStruct = pStruct; 3393 pNew->pStruct = pStruct;
3394 pNew->pColset = pColset;
2716 fts5StructureRef(pStruct); 3395 fts5StructureRef(pStruct);
3396 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3397 fts5IterSetOutputCb(&p->rc, pNew);
3398 }
2717 3399
2718 /* Initialize each of the component segment iterators. */ 3400 /* Initialize each of the component segment iterators. */
2719 if( iLevel<0 ){ 3401 if( p->rc==SQLITE_OK ){
2720 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; 3402 if( iLevel<0 ){
2721 if( p->pHash ){ 3403 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
2722 /* Add a segment iterator for the current contents of the hash table. */ 3404 if( p->pHash ){
2723 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; 3405 /* Add a segment iterator for the current contents of the hash table. */
2724 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
2725 }
2726 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
2727 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
2728 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
2729 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; 3406 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
2730 if( pTerm==0 ){ 3407 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
2731 fts5SegIterInit(p, pSeg, pIter); 3408 }
2732 }else{ 3409 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
2733 fts5SegIterSeekInit(p, &buf, pTerm, nTerm, flags, pSeg, pIter); 3410 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3411 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3412 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3413 if( pTerm==0 ){
3414 fts5SegIterInit(p, pSeg, pIter);
3415 }else{
3416 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3417 }
2734 } 3418 }
2735 } 3419 }
3420 }else{
3421 pLvl = &pStruct->aLevel[iLevel];
3422 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3423 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3424 }
2736 } 3425 }
2737 }else{ 3426 assert( iIter==nSeg );
2738 pLvl = &pStruct->aLevel[iLevel];
2739 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
2740 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
2741 }
2742 } 3427 }
2743 assert( iIter==nSeg );
2744 3428
2745 /* If the above was successful, each component iterators now points 3429 /* If the above was successful, each component iterators now points
2746 ** to the first entry in its segment. In this case initialize the 3430 ** to the first entry in its segment. In this case initialize the
2747 ** aFirst[] array. Or, if an error has occurred, free the iterator 3431 ** aFirst[] array. Or, if an error has occurred, free the iterator
2748 ** object and set the output variable to NULL. */ 3432 ** object and set the output variable to NULL. */
2749 if( p->rc==SQLITE_OK ){ 3433 if( p->rc==SQLITE_OK ){
2750 for(iIter=pNew->nSeg-1; iIter>0; iIter--){ 3434 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
2751 int iEq; 3435 int iEq;
2752 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ 3436 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
2753 fts5SegIterNext(p, &pNew->aSeg[iEq], 0); 3437 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3438 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
2754 fts5MultiIterAdvanced(p, pNew, iEq, iIter); 3439 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
2755 } 3440 }
2756 } 3441 }
2757 fts5MultiIterSetEof(pNew); 3442 fts5MultiIterSetEof(pNew);
2758 fts5AssertMultiIterSetup(p, pNew); 3443 fts5AssertMultiIterSetup(p, pNew);
2759 3444
2760 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ 3445 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
2761 fts5MultiIterNext(p, pNew, 0, 0); 3446 fts5MultiIterNext(p, pNew, 0, 0);
3447 }else if( pNew->base.bEof==0 ){
3448 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3449 pNew->xSetOutputs(pNew, pSeg);
2762 } 3450 }
3451
2763 }else{ 3452 }else{
2764 fts5MultiIterFree(p, pNew); 3453 fts5MultiIterFree(pNew);
2765 *ppOut = 0; 3454 *ppOut = 0;
2766 } 3455 }
2767 fts5BufferFree(&buf);
2768 } 3456 }
2769 3457
2770 /* 3458 /*
2771 ** Create an Fts5IndexIter that iterates through the doclist provided 3459 ** Create an Fts5Iter that iterates through the doclist provided
2772 ** as the second argument. 3460 ** as the second argument.
2773 */ 3461 */
2774 static void fts5MultiIterNew2( 3462 static void fts5MultiIterNew2(
2775 Fts5Index *p, /* FTS5 backend to iterate within */ 3463 Fts5Index *p, /* FTS5 backend to iterate within */
2776 Fts5Data *pData, /* Doclist to iterate through */ 3464 Fts5Data *pData, /* Doclist to iterate through */
2777 int bDesc, /* True for descending rowid order */ 3465 int bDesc, /* True for descending rowid order */
2778 Fts5IndexIter **ppOut /* New object */ 3466 Fts5Iter **ppOut /* New object */
2779 ){ 3467 ){
2780 Fts5IndexIter *pNew; 3468 Fts5Iter *pNew;
2781 pNew = fts5MultiIterAlloc(p, 2); 3469 pNew = fts5MultiIterAlloc(p, 2);
2782 if( pNew ){ 3470 if( pNew ){
2783 Fts5SegIter *pIter = &pNew->aSeg[1]; 3471 Fts5SegIter *pIter = &pNew->aSeg[1];
2784 3472
2785 pNew->bFiltered = 1;
2786 pIter->flags = FTS5_SEGITER_ONETERM; 3473 pIter->flags = FTS5_SEGITER_ONETERM;
2787 if( pData->szLeaf>0 ){ 3474 if( pData->szLeaf>0 ){
2788 pIter->pLeaf = pData; 3475 pIter->pLeaf = pData;
2789 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); 3476 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
2790 pIter->iEndofDoclist = pData->nn; 3477 pIter->iEndofDoclist = pData->nn;
2791 pNew->aFirst[1].iFirst = 1; 3478 pNew->aFirst[1].iFirst = 1;
2792 if( bDesc ){ 3479 if( bDesc ){
2793 pNew->bRev = 1; 3480 pNew->bRev = 1;
2794 pIter->flags |= FTS5_SEGITER_REVERSE; 3481 pIter->flags |= FTS5_SEGITER_REVERSE;
2795 fts5SegIterReverseInitPage(p, pIter); 3482 fts5SegIterReverseInitPage(p, pIter);
2796 }else{ 3483 }else{
2797 fts5SegIterLoadNPos(p, pIter); 3484 fts5SegIterLoadNPos(p, pIter);
2798 } 3485 }
2799 pData = 0; 3486 pData = 0;
2800 }else{ 3487 }else{
2801 pNew->bEof = 1; 3488 pNew->base.bEof = 1;
2802 } 3489 }
3490 fts5SegIterSetNext(p, pIter);
2803 3491
2804 *ppOut = pNew; 3492 *ppOut = pNew;
2805 } 3493 }
2806 3494
2807 fts5DataRelease(pData); 3495 fts5DataRelease(pData);
2808 } 3496 }
2809 3497
2810 /* 3498 /*
2811 ** Return true if the iterator is at EOF or if an error has occurred. 3499 ** Return true if the iterator is at EOF or if an error has occurred.
2812 ** False otherwise. 3500 ** False otherwise.
2813 */ 3501 */
2814 static int fts5MultiIterEof(Fts5Index *p, Fts5IndexIter *pIter){ 3502 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
2815 assert( p->rc 3503 assert( p->rc
2816 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof 3504 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
2817 ); 3505 );
2818 return (p->rc || pIter->bEof); 3506 return (p->rc || pIter->base.bEof);
2819 } 3507 }
2820 3508
2821 /* 3509 /*
2822 ** Return the rowid of the entry that the iterator currently points 3510 ** Return the rowid of the entry that the iterator currently points
2823 ** to. If the iterator points to EOF when this function is called the 3511 ** to. If the iterator points to EOF when this function is called the
2824 ** results are undefined. 3512 ** results are undefined.
2825 */ 3513 */
2826 static i64 fts5MultiIterRowid(Fts5IndexIter *pIter){ 3514 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
2827 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); 3515 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
2828 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; 3516 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
2829 } 3517 }
2830 3518
2831 /* 3519 /*
2832 ** Move the iterator to the next entry at or following iMatch. 3520 ** Move the iterator to the next entry at or following iMatch.
2833 */ 3521 */
2834 static void fts5MultiIterNextFrom( 3522 static void fts5MultiIterNextFrom(
2835 Fts5Index *p, 3523 Fts5Index *p,
2836 Fts5IndexIter *pIter, 3524 Fts5Iter *pIter,
2837 i64 iMatch 3525 i64 iMatch
2838 ){ 3526 ){
2839 while( 1 ){ 3527 while( 1 ){
2840 i64 iRowid; 3528 i64 iRowid;
2841 fts5MultiIterNext(p, pIter, 1, iMatch); 3529 fts5MultiIterNext(p, pIter, 1, iMatch);
2842 if( fts5MultiIterEof(p, pIter) ) break; 3530 if( fts5MultiIterEof(p, pIter) ) break;
2843 iRowid = fts5MultiIterRowid(pIter); 3531 iRowid = fts5MultiIterRowid(pIter);
2844 if( pIter->bRev==0 && iRowid>=iMatch ) break; 3532 if( pIter->bRev==0 && iRowid>=iMatch ) break;
2845 if( pIter->bRev!=0 && iRowid<=iMatch ) break; 3533 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
2846 } 3534 }
2847 } 3535 }
2848 3536
2849 /* 3537 /*
2850 ** Return a pointer to a buffer containing the term associated with the 3538 ** Return a pointer to a buffer containing the term associated with the
2851 ** entry that the iterator currently points to. 3539 ** entry that the iterator currently points to.
2852 */ 3540 */
2853 static const u8 *fts5MultiIterTerm(Fts5IndexIter *pIter, int *pn){ 3541 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
2854 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 3542 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2855 *pn = p->term.n; 3543 *pn = p->term.n;
2856 return p->term.p; 3544 return p->term.p;
2857 } 3545 }
2858 3546
2859 static void fts5ChunkIterate(
2860 Fts5Index *p, /* Index object */
2861 Fts5SegIter *pSeg, /* Poslist of this iterator */
2862 void *pCtx, /* Context pointer for xChunk callback */
2863 void (*xChunk)(Fts5Index*, void*, const u8*, int)
2864 ){
2865 int nRem = pSeg->nPos; /* Number of bytes still to come */
2866 Fts5Data *pData = 0;
2867 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
2868 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
2869 int pgno = pSeg->iLeafPgno;
2870 int pgnoSave = 0;
2871
2872 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
2873 pgnoSave = pgno+1;
2874 }
2875
2876 while( 1 ){
2877 xChunk(p, pCtx, pChunk, nChunk);
2878 nRem -= nChunk;
2879 fts5DataRelease(pData);
2880 if( nRem<=0 ){
2881 break;
2882 }else{
2883 pgno++;
2884 pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
2885 if( pData==0 ) break;
2886 pChunk = &pData->p[4];
2887 nChunk = MIN(nRem, pData->szLeaf - 4);
2888 if( pgno==pgnoSave ){
2889 assert( pSeg->pNextLeaf==0 );
2890 pSeg->pNextLeaf = pData;
2891 pData = 0;
2892 }
2893 }
2894 }
2895 }
2896
2897
2898
2899 /* 3547 /*
2900 ** Allocate a new segment-id for the structure pStruct. The new segment 3548 ** Allocate a new segment-id for the structure pStruct. The new segment
2901 ** id must be between 1 and 65335 inclusive, and must not be used by 3549 ** id must be between 1 and 65335 inclusive, and must not be used by
2902 ** any currently existing segment. If a free segment id cannot be found, 3550 ** any currently existing segment. If a free segment id cannot be found,
2903 ** SQLITE_FULL is returned. 3551 ** SQLITE_FULL is returned.
2904 ** 3552 **
2905 ** If an error has already occurred, this function is a no-op. 0 is 3553 ** If an error has already occurred, this function is a no-op. 0 is
2906 ** returned in this case. 3554 ** returned in this case.
2907 */ 3555 */
2908 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ 3556 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
2909 int iSegid = 0; 3557 int iSegid = 0;
2910 3558
2911 if( p->rc==SQLITE_OK ){ 3559 if( p->rc==SQLITE_OK ){
2912 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ 3560 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
2913 p->rc = SQLITE_FULL; 3561 p->rc = SQLITE_FULL;
2914 }else{ 3562 }else{
2915 while( iSegid==0 ){ 3563 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
2916 int iLvl, iSeg; 3564 ** array is 63 elements, or 252 bytes, in size. */
2917 sqlite3_randomness(sizeof(u32), (void*)&iSegid); 3565 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
2918 iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1); 3566 int iLvl, iSeg;
2919 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 3567 int i;
2920 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 3568 u32 mask;
2921 if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ 3569 memset(aUsed, 0, sizeof(aUsed));
2922 iSegid = 0; 3570 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
2923 } 3571 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3572 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3573 if( iId<=FTS5_MAX_SEGMENT ){
3574 aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
2924 } 3575 }
2925 } 3576 }
2926 } 3577 }
3578
3579 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3580 mask = aUsed[i];
3581 for(iSegid=0; mask & (1 << iSegid); iSegid++);
3582 iSegid += 1 + i*32;
3583
3584 #ifdef SQLITE_DEBUG
3585 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3586 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3587 assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3588 }
3589 }
3590 assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3591
3592 {
3593 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3594 if( p->rc==SQLITE_OK ){
3595 u8 aBlob[2] = {0xff, 0xff};
3596 sqlite3_bind_int(pIdxSelect, 1, iSegid);
3597 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3598 assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3599 p->rc = sqlite3_reset(pIdxSelect);
3600 }
3601 }
3602 #endif
2927 } 3603 }
2928 } 3604 }
2929 3605
2930 return iSegid; 3606 return iSegid;
2931 } 3607 }
2932 3608
2933 /* 3609 /*
2934 ** Discard all data currently cached in the hash-tables. 3610 ** Discard all data currently cached in the hash-tables.
2935 */ 3611 */
2936 static void fts5IndexDiscardData(Fts5Index *p){ 3612 static void fts5IndexDiscardData(Fts5Index *p){
2937 assert( p->pHash || p->nPendingData==0 ); 3613 assert( p->pHash || p->nPendingData==0 );
2938 if( p->pHash ){ 3614 if( p->pHash ){
2939 sqlite3Fts5HashClear(p->pHash); 3615 sqlite3Fts5HashClear(p->pHash);
2940 p->nPendingData = 0; 3616 p->nPendingData = 0;
2941 } 3617 }
2942 } 3618 }
2943 3619
2944 /* 3620 /*
2945 ** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares 3621 ** Return the size of the prefix, in bytes, that buffer
2946 ** with buffer (nOld/pOld). 3622 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3623 **
3624 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3625 ** than buffer (pOld/nOld).
2947 */ 3626 */
2948 static int fts5PrefixCompress( 3627 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
2949 int nOld, const u8 *pOld,
2950 int nNew, const u8 *pNew
2951 ){
2952 int i; 3628 int i;
2953 assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
2954 for(i=0; i<nOld; i++){ 3629 for(i=0; i<nOld; i++){
2955 if( pOld[i]!=pNew[i] ) break; 3630 if( pOld[i]!=pNew[i] ) break;
2956 } 3631 }
2957 return i; 3632 return i;
2958 } 3633 }
2959 3634
2960 static void fts5WriteDlidxClear( 3635 static void fts5WriteDlidxClear(
2961 Fts5Index *p, 3636 Fts5Index *p,
2962 Fts5SegWriter *pWriter, 3637 Fts5SegWriter *pWriter,
2963 int bFlush /* If true, write dlidx to disk */ 3638 int bFlush /* If true, write dlidx to disk */
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
3163 pDlidx->bPrevValid = 1; 3838 pDlidx->bPrevValid = 1;
3164 pDlidx->iPrev = iRowid; 3839 pDlidx->iPrev = iRowid;
3165 } 3840 }
3166 } 3841 }
3167 3842
3168 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ 3843 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3169 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; 3844 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3170 Fts5PageWriter *pPage = &pWriter->writer; 3845 Fts5PageWriter *pPage = &pWriter->writer;
3171 i64 iRowid; 3846 i64 iRowid;
3172 3847
3848 static int nCall = 0;
3849 nCall++;
3850
3173 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); 3851 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3174 3852
3175 /* Set the szLeaf header field. */ 3853 /* Set the szLeaf header field. */
3176 assert( 0==fts5GetU16(&pPage->buf.p[2]) ); 3854 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3177 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); 3855 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3178 3856
3179 if( pWriter->bFirstTermInPage ){ 3857 if( pWriter->bFirstTermInPage ){
3180 /* No term was written to this page. */ 3858 /* No term was written to this page. */
3181 assert( pPage->pgidx.n==0 ); 3859 assert( pPage->pgidx.n==0 );
3182 fts5WriteBtreeNoTerm(p, pWriter); 3860 fts5WriteBtreeNoTerm(p, pWriter);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
3253 ** byte longer than the longest prefix (pTerm/nTerm) shares with the 3931 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
3254 ** previous term. 3932 ** previous term.
3255 ** 3933 **
3256 ** Usually, the previous term is available in pPage->term. The exception 3934 ** Usually, the previous term is available in pPage->term. The exception
3257 ** is if this is the first term written in an incremental-merge step. 3935 ** is if this is the first term written in an incremental-merge step.
3258 ** In this case the previous term is not available, so just write a 3936 ** In this case the previous term is not available, so just write a
3259 ** copy of (pTerm/nTerm) into the parent node. This is slightly 3937 ** copy of (pTerm/nTerm) into the parent node. This is slightly
3260 ** inefficient, but still correct. */ 3938 ** inefficient, but still correct. */
3261 int n = nTerm; 3939 int n = nTerm;
3262 if( pPage->term.n ){ 3940 if( pPage->term.n ){
3263 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); 3941 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
3264 } 3942 }
3265 fts5WriteBtreeTerm(p, pWriter, n, pTerm); 3943 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
3266 pPage = &pWriter->writer; 3944 pPage = &pWriter->writer;
3267 } 3945 }
3268 }else{ 3946 }else{
3269 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); 3947 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
3270 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); 3948 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
3271 } 3949 }
3272 3950
3273 /* Append the number of bytes of new data, then the term data itself 3951 /* Append the number of bytes of new data, then the term data itself
3274 ** to the page. */ 3952 ** to the page. */
3275 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); 3953 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
3276 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); 3954 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
3277 3955
3278 /* Update the Fts5PageWriter.term field. */ 3956 /* Update the Fts5PageWriter.term field. */
3279 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); 3957 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
3280 pWriter->bFirstTermInPage = 0; 3958 pWriter->bFirstTermInPage = 0;
3281 3959
3282 pWriter->bFirstRowidInPage = 0; 3960 pWriter->bFirstRowidInPage = 0;
3283 pWriter->bFirstRowidInDoclist = 1; 3961 pWriter->bFirstRowidInDoclist = 1;
3284 3962
3285 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); 3963 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
3286 pWriter->aDlidx[0].pgno = pPage->pgno; 3964 pWriter->aDlidx[0].pgno = pPage->pgno;
3287 } 3965 }
3288 3966
3289 /* 3967 /*
3290 ** Append a rowid and position-list size field to the writers output. 3968 ** Append a rowid and position-list size field to the writers output.
3291 */ 3969 */
3292 static void fts5WriteAppendRowid( 3970 static void fts5WriteAppendRowid(
3293 Fts5Index *p, 3971 Fts5Index *p,
3294 Fts5SegWriter *pWriter, 3972 Fts5SegWriter *pWriter,
3295 i64 iRowid, 3973 i64 iRowid
3296 int nPos
3297 ){ 3974 ){
3298 if( p->rc==SQLITE_OK ){ 3975 if( p->rc==SQLITE_OK ){
3299 Fts5PageWriter *pPage = &pWriter->writer; 3976 Fts5PageWriter *pPage = &pWriter->writer;
3300 3977
3301 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ 3978 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
3302 fts5WriteFlushLeaf(p, pWriter); 3979 fts5WriteFlushLeaf(p, pWriter);
3303 } 3980 }
3304 3981
3305 /* If this is to be the first rowid written to the page, set the 3982 /* If this is to be the first rowid written to the page, set the
3306 ** rowid-pointer in the page-header. Also append a value to the dlidx 3983 ** rowid-pointer in the page-header. Also append a value to the dlidx
3307 ** buffer, in case a doclist-index is required. */ 3984 ** buffer, in case a doclist-index is required. */
3308 if( pWriter->bFirstRowidInPage ){ 3985 if( pWriter->bFirstRowidInPage ){
3309 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); 3986 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
3310 fts5WriteDlidxAppend(p, pWriter, iRowid); 3987 fts5WriteDlidxAppend(p, pWriter, iRowid);
3311 } 3988 }
3312 3989
3313 /* Write the rowid. */ 3990 /* Write the rowid. */
3314 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ 3991 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
3315 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); 3992 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
3316 }else{ 3993 }else{
3317 assert( p->rc || iRowid>pWriter->iPrevRowid ); 3994 assert( p->rc || iRowid>pWriter->iPrevRowid );
3318 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); 3995 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
3319 } 3996 }
3320 pWriter->iPrevRowid = iRowid; 3997 pWriter->iPrevRowid = iRowid;
3321 pWriter->bFirstRowidInDoclist = 0; 3998 pWriter->bFirstRowidInDoclist = 0;
3322 pWriter->bFirstRowidInPage = 0; 3999 pWriter->bFirstRowidInPage = 0;
3323
3324 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
3325 } 4000 }
3326 } 4001 }
3327 4002
3328 static void fts5WriteAppendPoslistData( 4003 static void fts5WriteAppendPoslistData(
3329 Fts5Index *p, 4004 Fts5Index *p,
3330 Fts5SegWriter *pWriter, 4005 Fts5SegWriter *pWriter,
3331 const u8 *aData, 4006 const u8 *aData,
3332 int nData 4007 int nData
3333 ){ 4008 ){
3334 Fts5PageWriter *pPage = &pWriter->writer; 4009 Fts5PageWriter *pPage = &pWriter->writer;
(...skipping 30 matching lines...) Expand all
3365 int *pnLeaf /* OUT: Number of leaf pages in b-tree */ 4040 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
3366 ){ 4041 ){
3367 int i; 4042 int i;
3368 Fts5PageWriter *pLeaf = &pWriter->writer; 4043 Fts5PageWriter *pLeaf = &pWriter->writer;
3369 if( p->rc==SQLITE_OK ){ 4044 if( p->rc==SQLITE_OK ){
3370 assert( pLeaf->pgno>=1 ); 4045 assert( pLeaf->pgno>=1 );
3371 if( pLeaf->buf.n>4 ){ 4046 if( pLeaf->buf.n>4 ){
3372 fts5WriteFlushLeaf(p, pWriter); 4047 fts5WriteFlushLeaf(p, pWriter);
3373 } 4048 }
3374 *pnLeaf = pLeaf->pgno-1; 4049 *pnLeaf = pLeaf->pgno-1;
3375 fts5WriteFlushBtree(p, pWriter); 4050 if( pLeaf->pgno>1 ){
4051 fts5WriteFlushBtree(p, pWriter);
4052 }
3376 } 4053 }
3377 fts5BufferFree(&pLeaf->term); 4054 fts5BufferFree(&pLeaf->term);
3378 fts5BufferFree(&pLeaf->buf); 4055 fts5BufferFree(&pLeaf->buf);
3379 fts5BufferFree(&pLeaf->pgidx); 4056 fts5BufferFree(&pLeaf->pgidx);
3380 fts5BufferFree(&pWriter->btterm); 4057 fts5BufferFree(&pWriter->btterm);
3381 4058
3382 for(i=0; i<pWriter->nDlidx; i++){ 4059 for(i=0; i<pWriter->nDlidx; i++){
3383 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); 4060 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
3384 } 4061 }
3385 sqlite3_free(pWriter->aDlidx); 4062 sqlite3_free(pWriter->aDlidx);
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
3425 ** inserted into %_idx by the current writer. */ 4102 ** inserted into %_idx by the current writer. */
3426 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); 4103 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
3427 } 4104 }
3428 } 4105 }
3429 4106
3430 /* 4107 /*
3431 ** Iterator pIter was used to iterate through the input segments of on an 4108 ** Iterator pIter was used to iterate through the input segments of on an
3432 ** incremental merge operation. This function is called if the incremental 4109 ** incremental merge operation. This function is called if the incremental
3433 ** merge step has finished but the input has not been completely exhausted. 4110 ** merge step has finished but the input has not been completely exhausted.
3434 */ 4111 */
3435 static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ 4112 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
3436 int i; 4113 int i;
3437 Fts5Buffer buf; 4114 Fts5Buffer buf;
3438 memset(&buf, 0, sizeof(Fts5Buffer)); 4115 memset(&buf, 0, sizeof(Fts5Buffer));
3439 for(i=0; i<pIter->nSeg; i++){ 4116 for(i=0; i<pIter->nSeg; i++){
3440 Fts5SegIter *pSeg = &pIter->aSeg[i]; 4117 Fts5SegIter *pSeg = &pIter->aSeg[i];
3441 if( pSeg->pSeg==0 ){ 4118 if( pSeg->pSeg==0 ){
3442 /* no-op */ 4119 /* no-op */
3443 }else if( pSeg->pLeaf==0 ){ 4120 }else if( pSeg->pLeaf==0 ){
3444 /* All keys from this input segment have been transfered to the output. 4121 /* All keys from this input segment have been transfered to the output.
3445 ** Set both the first and last page-numbers to 0 to indicate that the 4122 ** Set both the first and last page-numbers to 0 to indicate that the
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
3503 */ 4180 */
3504 static void fts5IndexMergeLevel( 4181 static void fts5IndexMergeLevel(
3505 Fts5Index *p, /* FTS5 backend object */ 4182 Fts5Index *p, /* FTS5 backend object */
3506 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ 4183 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
3507 int iLvl, /* Level to read input from */ 4184 int iLvl, /* Level to read input from */
3508 int *pnRem /* Write up to this many output leaves */ 4185 int *pnRem /* Write up to this many output leaves */
3509 ){ 4186 ){
3510 Fts5Structure *pStruct = *ppStruct; 4187 Fts5Structure *pStruct = *ppStruct;
3511 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 4188 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
3512 Fts5StructureLevel *pLvlOut; 4189 Fts5StructureLevel *pLvlOut;
3513 Fts5IndexIter *pIter = 0; /* Iterator to read input data */ 4190 Fts5Iter *pIter = 0; /* Iterator to read input data */
3514 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ 4191 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
3515 int nInput; /* Number of input segments */ 4192 int nInput; /* Number of input segments */
3516 Fts5SegWriter writer; /* Writer object */ 4193 Fts5SegWriter writer; /* Writer object */
3517 Fts5StructureSegment *pSeg; /* Output segment */ 4194 Fts5StructureSegment *pSeg; /* Output segment */
3518 Fts5Buffer term; 4195 Fts5Buffer term;
3519 int bOldest; /* True if the output segment is the oldest */ 4196 int bOldest; /* True if the output segment is the oldest */
4197 int eDetail = p->pConfig->eDetail;
4198 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
3520 4199
3521 assert( iLvl<pStruct->nLevel ); 4200 assert( iLvl<pStruct->nLevel );
3522 assert( pLvl->nMerge<=pLvl->nSeg ); 4201 assert( pLvl->nMerge<=pLvl->nSeg );
3523 4202
3524 memset(&writer, 0, sizeof(Fts5SegWriter)); 4203 memset(&writer, 0, sizeof(Fts5SegWriter));
3525 memset(&term, 0, sizeof(Fts5Buffer)); 4204 memset(&term, 0, sizeof(Fts5Buffer));
3526 if( pLvl->nMerge ){ 4205 if( pLvl->nMerge ){
3527 pLvlOut = &pStruct->aLevel[iLvl+1]; 4206 pLvlOut = &pStruct->aLevel[iLvl+1];
3528 assert( pLvlOut->nSeg>0 ); 4207 assert( pLvlOut->nSeg>0 );
3529 nInput = pLvl->nMerge; 4208 nInput = pLvl->nMerge;
(...skipping 24 matching lines...) Expand all
3554 pSeg->pgnoFirst = 1; 4233 pSeg->pgnoFirst = 1;
3555 pSeg->iSegid = iSegid; 4234 pSeg->iSegid = iSegid;
3556 pStruct->nSegment++; 4235 pStruct->nSegment++;
3557 4236
3558 /* Read input from all segments in the input level */ 4237 /* Read input from all segments in the input level */
3559 nInput = pLvl->nSeg; 4238 nInput = pLvl->nSeg;
3560 } 4239 }
3561 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); 4240 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
3562 4241
3563 assert( iLvl>=0 ); 4242 assert( iLvl>=0 );
3564 for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter); 4243 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
3565 fts5MultiIterEof(p, pIter)==0; 4244 fts5MultiIterEof(p, pIter)==0;
3566 fts5MultiIterNext(p, pIter, 0, 0) 4245 fts5MultiIterNext(p, pIter, 0, 0)
3567 ){ 4246 ){
3568 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 4247 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3569 int nPos; /* position-list size field value */ 4248 int nPos; /* position-list size field value */
3570 int nTerm; 4249 int nTerm;
3571 const u8 *pTerm; 4250 const u8 *pTerm;
3572 4251
3573 /* Check for key annihilation. */ 4252 /* Check for key annihilation. */
3574 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; 4253 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
3575 4254
3576 pTerm = fts5MultiIterTerm(pIter, &nTerm); 4255 pTerm = fts5MultiIterTerm(pIter, &nTerm);
3577 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ 4256 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
3578 if( pnRem && writer.nLeafWritten>nRem ){ 4257 if( pnRem && writer.nLeafWritten>nRem ){
3579 break; 4258 break;
3580 } 4259 }
3581 4260
3582 /* This is a new term. Append a term to the output segment. */ 4261 /* This is a new term. Append a term to the output segment. */
3583 fts5WriteAppendTerm(p, &writer, nTerm, pTerm); 4262 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
3584 fts5BufferSet(&p->rc, &term, nTerm, pTerm); 4263 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
3585 } 4264 }
3586 4265
3587 /* Append the rowid to the output */ 4266 /* Append the rowid to the output */
3588 /* WRITEPOSLISTSIZE */ 4267 /* WRITEPOSLISTSIZE */
3589 nPos = pSegIter->nPos*2 + pSegIter->bDel; 4268 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
3590 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);
3591 4269
3592 /* Append the position-list data to the output */ 4270 if( eDetail==FTS5_DETAIL_NONE ){
3593 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); 4271 if( pSegIter->bDel ){
4272 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4273 if( pSegIter->nPos>0 ){
4274 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4275 }
4276 }
4277 }else{
4278 /* Append the position-list data to the output */
4279 nPos = pSegIter->nPos*2 + pSegIter->bDel;
4280 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4281 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4282 }
3594 } 4283 }
3595 4284
3596 /* Flush the last leaf page to disk. Set the output segment b-tree height 4285 /* Flush the last leaf page to disk. Set the output segment b-tree height
3597 ** and last leaf page number at the same time. */ 4286 ** and last leaf page number at the same time. */
3598 fts5WriteFinish(p, &writer, &pSeg->pgnoLast); 4287 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
3599 4288
3600 if( fts5MultiIterEof(p, pIter) ){ 4289 if( fts5MultiIterEof(p, pIter) ){
3601 int i; 4290 int i;
3602 4291
3603 /* Remove the redundant segments from the %_data table */ 4292 /* Remove the redundant segments from the %_data table */
(...skipping 12 matching lines...) Expand all
3616 if( pSeg->pgnoLast==0 ){ 4305 if( pSeg->pgnoLast==0 ){
3617 pLvlOut->nSeg--; 4306 pLvlOut->nSeg--;
3618 pStruct->nSegment--; 4307 pStruct->nSegment--;
3619 } 4308 }
3620 }else{ 4309 }else{
3621 assert( pSeg->pgnoLast>0 ); 4310 assert( pSeg->pgnoLast>0 );
3622 fts5TrimSegments(p, pIter); 4311 fts5TrimSegments(p, pIter);
3623 pLvl->nMerge = nInput; 4312 pLvl->nMerge = nInput;
3624 } 4313 }
3625 4314
3626 fts5MultiIterFree(p, pIter); 4315 fts5MultiIterFree(pIter);
3627 fts5BufferFree(&term); 4316 fts5BufferFree(&term);
3628 if( pnRem ) *pnRem -= writer.nLeafWritten; 4317 if( pnRem ) *pnRem -= writer.nLeafWritten;
3629 } 4318 }
3630 4319
3631 /* 4320 /*
3632 ** Do up to nPg pages of automerge work on the index. 4321 ** Do up to nPg pages of automerge work on the index.
4322 **
4323 ** Return true if any changes were actually made, or false otherwise.
3633 */ 4324 */
3634 static void fts5IndexMerge( 4325 static int fts5IndexMerge(
3635 Fts5Index *p, /* FTS5 backend object */ 4326 Fts5Index *p, /* FTS5 backend object */
3636 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ 4327 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
3637 int nPg /* Pages of work to do */ 4328 int nPg, /* Pages of work to do */
4329 int nMin /* Minimum number of segments to merge */
3638 ){ 4330 ){
3639 int nRem = nPg; 4331 int nRem = nPg;
4332 int bRet = 0;
3640 Fts5Structure *pStruct = *ppStruct; 4333 Fts5Structure *pStruct = *ppStruct;
3641 while( nRem>0 && p->rc==SQLITE_OK ){ 4334 while( nRem>0 && p->rc==SQLITE_OK ){
3642 int iLvl; /* To iterate through levels */ 4335 int iLvl; /* To iterate through levels */
3643 int iBestLvl = 0; /* Level offering the most input segments */ 4336 int iBestLvl = 0; /* Level offering the most input segments */
3644 int nBest = 0; /* Number of input segments on best level */ 4337 int nBest = 0; /* Number of input segments on best level */
3645 4338
3646 /* Set iBestLvl to the level to read input segments from. */ 4339 /* Set iBestLvl to the level to read input segments from. */
3647 assert( pStruct->nLevel>0 ); 4340 assert( pStruct->nLevel>0 );
3648 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 4341 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3649 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 4342 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
(...skipping 10 matching lines...) Expand all
3660 } 4353 }
3661 } 4354 }
3662 4355
3663 /* If nBest is still 0, then the index must be empty. */ 4356 /* If nBest is still 0, then the index must be empty. */
3664 #ifdef SQLITE_DEBUG 4357 #ifdef SQLITE_DEBUG
3665 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ 4358 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
3666 assert( pStruct->aLevel[iLvl].nSeg==0 ); 4359 assert( pStruct->aLevel[iLvl].nSeg==0 );
3667 } 4360 }
3668 #endif 4361 #endif
3669 4362
3670 if( nBest<p->pConfig->nAutomerge 4363 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
3671 && pStruct->aLevel[iBestLvl].nMerge==0
3672 ){
3673 break; 4364 break;
3674 } 4365 }
4366 bRet = 1;
3675 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); 4367 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
3676 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ 4368 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
3677 fts5StructurePromote(p, iBestLvl+1, pStruct); 4369 fts5StructurePromote(p, iBestLvl+1, pStruct);
3678 } 4370 }
3679 } 4371 }
3680 *ppStruct = pStruct; 4372 *ppStruct = pStruct;
4373 return bRet;
3681 } 4374 }
3682 4375
3683 /* 4376 /*
3684 ** A total of nLeaf leaf pages of data has just been flushed to a level-0 4377 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
3685 ** segment. This function updates the write-counter accordingly and, if 4378 ** segment. This function updates the write-counter accordingly and, if
3686 ** necessary, performs incremental merge work. 4379 ** necessary, performs incremental merge work.
3687 ** 4380 **
3688 ** If an error occurs, set the Fts5Index.rc error code. If an error has 4381 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3689 ** already occurred, this function is a no-op. 4382 ** already occurred, this function is a no-op.
3690 */ 4383 */
3691 static void fts5IndexAutomerge( 4384 static void fts5IndexAutomerge(
3692 Fts5Index *p, /* FTS5 backend object */ 4385 Fts5Index *p, /* FTS5 backend object */
3693 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ 4386 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
3694 int nLeaf /* Number of output leaves just written */ 4387 int nLeaf /* Number of output leaves just written */
3695 ){ 4388 ){
3696 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ 4389 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
3697 Fts5Structure *pStruct = *ppStruct; 4390 Fts5Structure *pStruct = *ppStruct;
3698 u64 nWrite; /* Initial value of write-counter */ 4391 u64 nWrite; /* Initial value of write-counter */
3699 int nWork; /* Number of work-quanta to perform */ 4392 int nWork; /* Number of work-quanta to perform */
3700 int nRem; /* Number of leaf pages left to write */ 4393 int nRem; /* Number of leaf pages left to write */
3701 4394
3702 /* Update the write-counter. While doing so, set nWork. */ 4395 /* Update the write-counter. While doing so, set nWork. */
3703 nWrite = pStruct->nWriteCounter; 4396 nWrite = pStruct->nWriteCounter;
3704 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); 4397 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
3705 pStruct->nWriteCounter += nLeaf; 4398 pStruct->nWriteCounter += nLeaf;
3706 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); 4399 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
3707 4400
3708 fts5IndexMerge(p, ppStruct, nRem); 4401 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
3709 } 4402 }
3710 } 4403 }
3711 4404
3712 static void fts5IndexCrisismerge( 4405 static void fts5IndexCrisismerge(
3713 Fts5Index *p, /* FTS5 backend object */ 4406 Fts5Index *p, /* FTS5 backend object */
3714 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ 4407 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
3715 ){ 4408 ){
3716 const int nCrisis = p->pConfig->nCrisisMerge; 4409 const int nCrisis = p->pConfig->nCrisisMerge;
3717 Fts5Structure *pStruct = *ppStruct; 4410 Fts5Structure *pStruct = *ppStruct;
3718 int iLvl = 0; 4411 int iLvl = 0;
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
3768 static void fts5FlushOneHash(Fts5Index *p){ 4461 static void fts5FlushOneHash(Fts5Index *p){
3769 Fts5Hash *pHash = p->pHash; 4462 Fts5Hash *pHash = p->pHash;
3770 Fts5Structure *pStruct; 4463 Fts5Structure *pStruct;
3771 int iSegid; 4464 int iSegid;
3772 int pgnoLast = 0; /* Last leaf page number in segment */ 4465 int pgnoLast = 0; /* Last leaf page number in segment */
3773 4466
3774 /* Obtain a reference to the index structure and allocate a new segment-id 4467 /* Obtain a reference to the index structure and allocate a new segment-id
3775 ** for the new level-0 segment. */ 4468 ** for the new level-0 segment. */
3776 pStruct = fts5StructureRead(p); 4469 pStruct = fts5StructureRead(p);
3777 iSegid = fts5AllocateSegid(p, pStruct); 4470 iSegid = fts5AllocateSegid(p, pStruct);
4471 fts5StructureInvalidate(p);
3778 4472
3779 if( iSegid ){ 4473 if( iSegid ){
3780 const int pgsz = p->pConfig->pgsz; 4474 const int pgsz = p->pConfig->pgsz;
3781 4475 int eDetail = p->pConfig->eDetail;
3782 Fts5StructureSegment *pSeg; /* New segment within pStruct */ 4476 Fts5StructureSegment *pSeg; /* New segment within pStruct */
3783 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ 4477 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
3784 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ 4478 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
3785 4479
3786 Fts5SegWriter writer; 4480 Fts5SegWriter writer;
3787 fts5WriteInit(p, &writer, iSegid); 4481 fts5WriteInit(p, &writer, iSegid);
3788 4482
3789 pBuf = &writer.writer.buf; 4483 pBuf = &writer.writer.buf;
3790 pPgidx = &writer.writer.pgidx; 4484 pPgidx = &writer.writer.pgidx;
3791 4485
(...skipping 22 matching lines...) Expand all
3814 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); 4508 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
3815 }else{ 4509 }else{
3816 i64 iRowid = 0; 4510 i64 iRowid = 0;
3817 i64 iDelta = 0; 4511 i64 iDelta = 0;
3818 int iOff = 0; 4512 int iOff = 0;
3819 4513
3820 /* The entire doclist will not fit on this leaf. The following 4514 /* The entire doclist will not fit on this leaf. The following
3821 ** loop iterates through the poslists that make up the current 4515 ** loop iterates through the poslists that make up the current
3822 ** doclist. */ 4516 ** doclist. */
3823 while( p->rc==SQLITE_OK && iOff<nDoclist ){ 4517 while( p->rc==SQLITE_OK && iOff<nDoclist ){
3824 int nPos;
3825 int nCopy;
3826 int bDummy;
3827 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta); 4518 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
3828 nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
3829 nCopy += nPos;
3830 iRowid += iDelta; 4519 iRowid += iDelta;
3831 4520
3832 if( writer.bFirstRowidInPage ){ 4521 if( writer.bFirstRowidInPage ){
3833 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ 4522 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
3834 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); 4523 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
3835 writer.bFirstRowidInPage = 0; 4524 writer.bFirstRowidInPage = 0;
3836 fts5WriteDlidxAppend(p, &writer, iRowid); 4525 fts5WriteDlidxAppend(p, &writer, iRowid);
3837 }else{ 4526 }else{
3838 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); 4527 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
3839 } 4528 }
3840 assert( pBuf->n<=pBuf->nSpace ); 4529 assert( pBuf->n<=pBuf->nSpace );
3841 4530
3842 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ 4531 if( eDetail==FTS5_DETAIL_NONE ){
3843 /* The entire poslist will fit on the current leaf. So copy 4532 if( iOff<nDoclist && pDoclist[iOff]==0 ){
3844 ** it in one go. */ 4533 pBuf->p[pBuf->n++] = 0;
3845 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); 4534 iOff++;
4535 if( iOff<nDoclist && pDoclist[iOff]==0 ){
4536 pBuf->p[pBuf->n++] = 0;
4537 iOff++;
4538 }
4539 }
4540 if( (pBuf->n + pPgidx->n)>=pgsz ){
4541 fts5WriteFlushLeaf(p, &writer);
4542 }
3846 }else{ 4543 }else{
3847 /* The entire poslist will not fit on this leaf. So it needs 4544 int bDummy;
3848 ** to be broken into sections. The only qualification being 4545 int nPos;
3849 ** that each varint must be stored contiguously. */ 4546 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
3850 const u8 *pPoslist = &pDoclist[iOff]; 4547 nCopy += nPos;
3851 int iPos = 0; 4548 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
3852 while( p->rc==SQLITE_OK ){ 4549 /* The entire poslist will fit on the current leaf. So copy
3853 int nSpace = pgsz - pBuf->n - pPgidx->n; 4550 ** it in one go. */
3854 int n = 0; 4551 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
3855 if( (nCopy - iPos)<=nSpace ){ 4552 }else{
3856 n = nCopy - iPos; 4553 /* The entire poslist will not fit on this leaf. So it needs
3857 }else{ 4554 ** to be broken into sections. The only qualification being
3858 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); 4555 ** that each varint must be stored contiguously. */
4556 const u8 *pPoslist = &pDoclist[iOff];
4557 int iPos = 0;
4558 while( p->rc==SQLITE_OK ){
4559 int nSpace = pgsz - pBuf->n - pPgidx->n;
4560 int n = 0;
4561 if( (nCopy - iPos)<=nSpace ){
4562 n = nCopy - iPos;
4563 }else{
4564 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
4565 }
4566 assert( n>0 );
4567 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
4568 iPos += n;
4569 if( (pBuf->n + pPgidx->n)>=pgsz ){
4570 fts5WriteFlushLeaf(p, &writer);
4571 }
4572 if( iPos>=nCopy ) break;
3859 } 4573 }
3860 assert( n>0 );
3861 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
3862 iPos += n;
3863 if( (pBuf->n + pPgidx->n)>=pgsz ){
3864 fts5WriteFlushLeaf(p, &writer);
3865 }
3866 if( iPos>=nCopy ) break;
3867 } 4574 }
4575 iOff += nCopy;
3868 } 4576 }
3869 iOff += nCopy;
3870 } 4577 }
3871 } 4578 }
3872 4579
3873 /* TODO2: Doclist terminator written here. */ 4580 /* TODO2: Doclist terminator written here. */
3874 /* pBuf->p[pBuf->n++] = '\0'; */ 4581 /* pBuf->p[pBuf->n++] = '\0'; */
3875 assert( pBuf->n<=pBuf->nSpace ); 4582 assert( pBuf->n<=pBuf->nSpace );
3876 sqlite3Fts5HashScanNext(pHash); 4583 sqlite3Fts5HashScanNext(pHash);
3877 } 4584 }
3878 sqlite3Fts5HashClear(pHash); 4585 sqlite3Fts5HashClear(pHash);
3879 fts5WriteFinish(p, &writer, &pgnoLast); 4586 fts5WriteFinish(p, &writer, &pgnoLast);
(...skipping 25 matching lines...) Expand all
3905 */ 4612 */
3906 static void fts5IndexFlush(Fts5Index *p){ 4613 static void fts5IndexFlush(Fts5Index *p){
3907 /* Unless it is empty, flush the hash table to disk */ 4614 /* Unless it is empty, flush the hash table to disk */
3908 if( p->nPendingData ){ 4615 if( p->nPendingData ){
3909 assert( p->pHash ); 4616 assert( p->pHash );
3910 p->nPendingData = 0; 4617 p->nPendingData = 0;
3911 fts5FlushOneHash(p); 4618 fts5FlushOneHash(p);
3912 } 4619 }
3913 } 4620 }
3914 4621
4622 static Fts5Structure *fts5IndexOptimizeStruct(
4623 Fts5Index *p,
4624 Fts5Structure *pStruct
4625 ){
4626 Fts5Structure *pNew = 0;
4627 int nByte = sizeof(Fts5Structure);
4628 int nSeg = pStruct->nSegment;
4629 int i;
3915 4630
3916 int sqlite3Fts5IndexOptimize(Fts5Index *p){ 4631 /* Figure out if this structure requires optimization. A structure does
3917 Fts5Structure *pStruct; 4632 ** not require optimization if either:
3918 Fts5Structure *pNew = 0; 4633 **
3919 int nSeg = 0; 4634 ** + it consists of fewer than two segments, or
4635 ** + all segments are on the same level, or
4636 ** + all segments except one are currently inputs to a merge operation.
4637 **
4638 ** In the first case, return NULL. In the second, increment the ref-count
4639 ** on *pStruct and return a copy of the pointer to it.
4640 */
4641 if( nSeg<2 ) return 0;
4642 for(i=0; i<pStruct->nLevel; i++){
4643 int nThis = pStruct->aLevel[i].nSeg;
4644 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
4645 fts5StructureRef(pStruct);
4646 return pStruct;
4647 }
4648 assert( pStruct->aLevel[i].nMerge<=nThis );
4649 }
3920 4650
3921 assert( p->rc==SQLITE_OK ); 4651 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
3922 fts5IndexFlush(p); 4652 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
3923 pStruct = fts5StructureRead(p);
3924 4653
3925 if( pStruct ){
3926 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3927 nSeg = pStruct->nSegment;
3928 if( nSeg>1 ){
3929 int nByte = sizeof(Fts5Structure);
3930 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
3931 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
3932 }
3933 }
3934 if( pNew ){ 4654 if( pNew ){
3935 Fts5StructureLevel *pLvl; 4655 Fts5StructureLevel *pLvl;
3936 int nByte = nSeg * sizeof(Fts5StructureSegment); 4656 nByte = nSeg * sizeof(Fts5StructureSegment);
3937 pNew->nLevel = pStruct->nLevel+1; 4657 pNew->nLevel = pStruct->nLevel+1;
3938 pNew->nRef = 1; 4658 pNew->nRef = 1;
3939 pNew->nWriteCounter = pStruct->nWriteCounter; 4659 pNew->nWriteCounter = pStruct->nWriteCounter;
3940 pLvl = &pNew->aLevel[pStruct->nLevel]; 4660 pLvl = &pNew->aLevel[pStruct->nLevel];
3941 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); 4661 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
3942 if( pLvl->aSeg ){ 4662 if( pLvl->aSeg ){
3943 int iLvl, iSeg; 4663 int iLvl, iSeg;
3944 int iSegOut = 0; 4664 int iSegOut = 0;
3945 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 4665 /* Iterate through all segments, from oldest to newest. Add them to
4666 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
4667 ** segment in the data structure. */
4668 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
3946 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 4669 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3947 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; 4670 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
3948 iSegOut++; 4671 iSegOut++;
3949 } 4672 }
3950 } 4673 }
3951 pNew->nSegment = pLvl->nSeg = nSeg; 4674 pNew->nSegment = pLvl->nSeg = nSeg;
3952 }else{ 4675 }else{
3953 sqlite3_free(pNew); 4676 sqlite3_free(pNew);
3954 pNew = 0; 4677 pNew = 0;
3955 } 4678 }
3956 } 4679 }
3957 4680
4681 return pNew;
4682 }
4683
4684 int sqlite3Fts5IndexOptimize(Fts5Index *p){
4685 Fts5Structure *pStruct;
4686 Fts5Structure *pNew = 0;
4687
4688 assert( p->rc==SQLITE_OK );
4689 fts5IndexFlush(p);
4690 pStruct = fts5StructureRead(p);
4691 fts5StructureInvalidate(p);
4692
4693 if( pStruct ){
4694 pNew = fts5IndexOptimizeStruct(p, pStruct);
4695 }
4696 fts5StructureRelease(pStruct);
4697
4698 assert( pNew==0 || pNew->nSegment>0 );
3958 if( pNew ){ 4699 if( pNew ){
3959 int iLvl = pNew->nLevel-1; 4700 int iLvl;
4701 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
3960 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ 4702 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
3961 int nRem = FTS5_OPT_WORK_UNIT; 4703 int nRem = FTS5_OPT_WORK_UNIT;
3962 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); 4704 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
3963 } 4705 }
3964 4706
3965 fts5StructureWrite(p, pNew); 4707 fts5StructureWrite(p, pNew);
3966 fts5StructureRelease(pNew); 4708 fts5StructureRelease(pNew);
3967 } 4709 }
3968 4710
3969 fts5StructureRelease(pStruct);
3970 return fts5IndexReturn(p); 4711 return fts5IndexReturn(p);
3971 } 4712 }
3972 4713
4714 /*
4715 ** This is called to implement the special "VALUES('merge', $nMerge)"
4716 ** INSERT command.
4717 */
3973 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ 4718 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
3974 Fts5Structure *pStruct; 4719 Fts5Structure *pStruct = fts5StructureRead(p);
3975 4720 if( pStruct ){
3976 pStruct = fts5StructureRead(p); 4721 int nMin = p->pConfig->nUsermerge;
3977 if( pStruct && pStruct->nLevel ){ 4722 fts5StructureInvalidate(p);
3978 fts5IndexMerge(p, &pStruct, nMerge); 4723 if( nMerge<0 ){
3979 fts5StructureWrite(p, pStruct); 4724 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
4725 fts5StructureRelease(pStruct);
4726 pStruct = pNew;
4727 nMin = 2;
4728 nMerge = nMerge*-1;
4729 }
4730 if( pStruct && pStruct->nLevel ){
4731 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
4732 fts5StructureWrite(p, pStruct);
4733 }
4734 }
4735 fts5StructureRelease(pStruct);
3980 } 4736 }
3981 fts5StructureRelease(pStruct);
3982
3983 return fts5IndexReturn(p); 4737 return fts5IndexReturn(p);
3984 } 4738 }
3985 4739
3986 static void fts5PoslistCallback( 4740 static void fts5AppendRowid(
3987 Fts5Index *p, 4741 Fts5Index *p,
3988 void *pContext, 4742 i64 iDelta,
3989 const u8 *pChunk, int nChunk 4743 Fts5Iter *pUnused,
4744 Fts5Buffer *pBuf
3990 ){ 4745 ){
3991 assert_nc( nChunk>=0 ); 4746 UNUSED_PARAM(pUnused);
3992 if( nChunk>0 ){ 4747 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
3993 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); 4748 }
4749
4750 static void fts5AppendPoslist(
4751 Fts5Index *p,
4752 i64 iDelta,
4753 Fts5Iter *pMulti,
4754 Fts5Buffer *pBuf
4755 ){
4756 int nData = pMulti->base.nData;
4757 assert( nData>0 );
4758 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
4759 fts5BufferSafeAppendVarint(pBuf, iDelta);
4760 fts5BufferSafeAppendVarint(pBuf, nData*2);
4761 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
3994 } 4762 }
3995 } 4763 }
3996 4764
3997 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3998 struct PoslistCallbackCtx {
3999 Fts5Buffer *pBuf; /* Append to this buffer */
4000 Fts5Colset *pColset; /* Restrict matches to this column */
4001 int eState; /* See above */
4002 };
4003
4004 /*
4005 ** TODO: Make this more efficient!
4006 */
4007 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
4008 int i;
4009 for(i=0; i<pColset->nCol; i++){
4010 if( pColset->aiCol[i]==iCol ) return 1;
4011 }
4012 return 0;
4013 }
4014
4015 static void fts5PoslistFilterCallback(
4016 Fts5Index *p,
4017 void *pContext,
4018 const u8 *pChunk, int nChunk
4019 ){
4020 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
4021 assert_nc( nChunk>=0 );
4022 if( nChunk>0 ){
4023 /* Search through to find the first varint with value 1. This is the
4024 ** start of the next columns hits. */
4025 int i = 0;
4026 int iStart = 0;
4027
4028 if( pCtx->eState==2 ){
4029 int iCol;
4030 fts5FastGetVarint32(pChunk, i, iCol);
4031 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
4032 pCtx->eState = 1;
4033 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
4034 }else{
4035 pCtx->eState = 0;
4036 }
4037 }
4038
4039 do {
4040 while( i<nChunk && pChunk[i]!=0x01 ){
4041 while( pChunk[i] & 0x80 ) i++;
4042 i++;
4043 }
4044 if( pCtx->eState ){
4045 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
4046 }
4047 if( i<nChunk ){
4048 int iCol;
4049 iStart = i;
4050 i++;
4051 if( i>=nChunk ){
4052 pCtx->eState = 2;
4053 }else{
4054 fts5FastGetVarint32(pChunk, i, iCol);
4055 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
4056 if( pCtx->eState ){
4057 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
4058 iStart = i;
4059 }
4060 }
4061 }
4062 }while( i<nChunk );
4063 }
4064 }
4065
4066 /*
4067 ** Iterator pIter currently points to a valid entry (not EOF). This
4068 ** function appends the position list data for the current entry to
4069 ** buffer pBuf. It does not make a copy of the position-list size
4070 ** field.
4071 */
4072 static void fts5SegiterPoslist(
4073 Fts5Index *p,
4074 Fts5SegIter *pSeg,
4075 Fts5Colset *pColset,
4076 Fts5Buffer *pBuf
4077 ){
4078 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
4079 if( pColset==0 ){
4080 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
4081 }else{
4082 PoslistCallbackCtx sCtx;
4083 sCtx.pBuf = pBuf;
4084 sCtx.pColset = pColset;
4085 sCtx.eState = fts5IndexColsetTest(pColset, 0);
4086 assert( sCtx.eState==0 || sCtx.eState==1 );
4087 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
4088 }
4089 }
4090 }
4091
4092 /*
4093 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If
4094 ** the position list contains entries for column iCol, then (*pa) is set
4095 ** to point to the sub-position-list for that column and the number of
4096 ** bytes in it returned. Or, if the argument position list does not
4097 ** contain any entries for column iCol, return 0.
4098 */
4099 static int fts5IndexExtractCol(
4100 const u8 **pa, /* IN/OUT: Pointer to poslist */
4101 int n, /* IN: Size of poslist in bytes */
4102 int iCol /* Column to extract from poslist */
4103 ){
4104 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
4105 const u8 *p = *pa;
4106 const u8 *pEnd = &p[n]; /* One byte past end of position list */
4107 u8 prev = 0;
4108
4109 while( iCol>iCurrent ){
4110 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
4111 ** not part of a varint */
4112 while( (prev & 0x80) || *p!=0x01 ){
4113 prev = *p++;
4114 if( p==pEnd ) return 0;
4115 }
4116 *pa = p++;
4117 p += fts5GetVarint32(p, iCurrent);
4118 }
4119 if( iCol!=iCurrent ) return 0;
4120
4121 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
4122 ** not part of a varint */
4123 assert( (prev & 0x80)==0 );
4124 while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
4125 prev = *p++;
4126 }
4127 return p - (*pa);
4128 }
4129
4130
4131 /*
4132 ** Iterator pMulti currently points to a valid entry (not EOF). This
4133 ** function appends the following to buffer pBuf:
4134 **
4135 ** * The varint iDelta, and
4136 ** * the position list that currently points to, including the size field.
4137 **
4138 ** If argument pColset is NULL, then the position list is filtered according
4139 ** to pColset before being appended to the buffer. If this means there are
4140 ** no entries in the position list, nothing is appended to the buffer (not
4141 ** even iDelta).
4142 **
4143 ** If an error occurs, an error code is left in p->rc.
4144 */
4145 static int fts5AppendPoslist(
4146 Fts5Index *p,
4147 i64 iDelta,
4148 Fts5IndexIter *pMulti,
4149 Fts5Colset *pColset,
4150 Fts5Buffer *pBuf
4151 ){
4152 if( p->rc==SQLITE_OK ){
4153 Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
4154 assert( fts5MultiIterEof(p, pMulti)==0 );
4155 assert( pSeg->nPos>0 );
4156 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+9+9) ){
4157
4158 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf
4159 && (pColset==0 || pColset->nCol==1)
4160 ){
4161 const u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
4162 int nPos;
4163 if( pColset ){
4164 nPos = fts5IndexExtractCol(&pPos, pSeg->nPos, pColset->aiCol[0]);
4165 if( nPos==0 ) return 1;
4166 }else{
4167 nPos = pSeg->nPos;
4168 }
4169 assert( nPos>0 );
4170 fts5BufferSafeAppendVarint(pBuf, iDelta);
4171 fts5BufferSafeAppendVarint(pBuf, nPos*2);
4172 fts5BufferSafeAppendBlob(pBuf, pPos, nPos);
4173 }else{
4174 int iSv1;
4175 int iSv2;
4176 int iData;
4177
4178 /* Append iDelta */
4179 iSv1 = pBuf->n;
4180 fts5BufferSafeAppendVarint(pBuf, iDelta);
4181
4182 /* WRITEPOSLISTSIZE */
4183 iSv2 = pBuf->n;
4184 fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
4185 iData = pBuf->n;
4186
4187 fts5SegiterPoslist(p, pSeg, pColset, pBuf);
4188
4189 if( pColset ){
4190 int nActual = pBuf->n - iData;
4191 if( nActual!=pSeg->nPos ){
4192 if( nActual==0 ){
4193 pBuf->n = iSv1;
4194 return 1;
4195 }else{
4196 int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
4197 while( iSv2<(iData-nReq) ){ pBuf->p[iSv2++] = 0x80; }
4198 sqlite3Fts5PutVarint(&pBuf->p[iSv2], nActual*2);
4199 }
4200 }
4201 }
4202 }
4203
4204 }
4205 }
4206
4207 return 0;
4208 }
4209 4765
4210 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ 4766 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4211 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; 4767 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4212 4768
4213 assert( pIter->aPoslist ); 4769 assert( pIter->aPoslist );
4214 if( p>=pIter->aEof ){ 4770 if( p>=pIter->aEof ){
4215 pIter->aPoslist = 0; 4771 pIter->aPoslist = 0;
4216 }else{ 4772 }else{
4217 i64 iDelta; 4773 i64 iDelta;
4218 4774
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
4261 } 4817 }
4262 #endif 4818 #endif
4263 4819
4264 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ 4820 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
4265 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ 4821 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
4266 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ 4822 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
4267 (iLastRowid) = (iRowid); \ 4823 (iLastRowid) = (iRowid); \
4268 } 4824 }
4269 4825
4270 /* 4826 /*
4827 ** Swap the contents of buffer *p1 with that of *p2.
4828 */
4829 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4830 Fts5Buffer tmp = *p1;
4831 *p1 = *p2;
4832 *p2 = tmp;
4833 }
4834
4835 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
4836 int i = *piOff;
4837 if( i>=pBuf->n ){
4838 *piOff = -1;
4839 }else{
4840 u64 iVal;
4841 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
4842 *piRowid += iVal;
4843 }
4844 }
4845
4846 /*
4847 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
4848 ** In this case the buffers consist of a delta-encoded list of rowids only.
4849 */
4850 static void fts5MergeRowidLists(
4851 Fts5Index *p, /* FTS5 backend object */
4852 Fts5Buffer *p1, /* First list to merge */
4853 Fts5Buffer *p2 /* Second list to merge */
4854 ){
4855 int i1 = 0;
4856 int i2 = 0;
4857 i64 iRowid1 = 0;
4858 i64 iRowid2 = 0;
4859 i64 iOut = 0;
4860
4861 Fts5Buffer out;
4862 memset(&out, 0, sizeof(out));
4863 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4864 if( p->rc ) return;
4865
4866 fts5NextRowid(p1, &i1, &iRowid1);
4867 fts5NextRowid(p2, &i2, &iRowid2);
4868 while( i1>=0 || i2>=0 ){
4869 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
4870 assert( iOut==0 || iRowid1>iOut );
4871 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
4872 iOut = iRowid1;
4873 fts5NextRowid(p1, &i1, &iRowid1);
4874 }else{
4875 assert( iOut==0 || iRowid2>iOut );
4876 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
4877 iOut = iRowid2;
4878 if( i1>=0 && iRowid1==iRowid2 ){
4879 fts5NextRowid(p1, &i1, &iRowid1);
4880 }
4881 fts5NextRowid(p2, &i2, &iRowid2);
4882 }
4883 }
4884
4885 fts5BufferSwap(&out, p1);
4886 fts5BufferFree(&out);
4887 }
4888
4889 /*
4271 ** Buffers p1 and p2 contain doclists. This function merges the content 4890 ** Buffers p1 and p2 contain doclists. This function merges the content
4272 ** of the two doclists together and sets buffer p1 to the result before 4891 ** of the two doclists together and sets buffer p1 to the result before
4273 ** returning. 4892 ** returning.
4274 ** 4893 **
4275 ** If an error occurs, an error code is left in p->rc. If an error has 4894 ** If an error occurs, an error code is left in p->rc. If an error has
4276 ** already occurred, this function is a no-op. 4895 ** already occurred, this function is a no-op.
4277 */ 4896 */
4278 static void fts5MergePrefixLists( 4897 static void fts5MergePrefixLists(
4279 Fts5Index *p, /* FTS5 backend object */ 4898 Fts5Index *p, /* FTS5 backend object */
4280 Fts5Buffer *p1, /* First list to merge */ 4899 Fts5Buffer *p1, /* First list to merge */
4281 Fts5Buffer *p2 /* Second list to merge */ 4900 Fts5Buffer *p2 /* Second list to merge */
4282 ){ 4901 ){
4283 if( p2->n ){ 4902 if( p2->n ){
4284 i64 iLastRowid = 0; 4903 i64 iLastRowid = 0;
4285 Fts5DoclistIter i1; 4904 Fts5DoclistIter i1;
4286 Fts5DoclistIter i2; 4905 Fts5DoclistIter i2;
4287 Fts5Buffer out; 4906 Fts5Buffer out = {0, 0, 0};
4288 Fts5Buffer tmp; 4907 Fts5Buffer tmp = {0, 0, 0};
4289 memset(&out, 0, sizeof(out));
4290 memset(&tmp, 0, sizeof(tmp));
4291 4908
4292 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); 4909 if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
4293 fts5DoclistIterInit(p1, &i1); 4910 fts5DoclistIterInit(p1, &i1);
4294 fts5DoclistIterInit(p2, &i2); 4911 fts5DoclistIterInit(p2, &i2);
4295 while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ 4912
4296 if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){ 4913 while( 1 ){
4914 if( i1.iRowid<i2.iRowid ){
4297 /* Copy entry from i1 */ 4915 /* Copy entry from i1 */
4298 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); 4916 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
4299 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize); 4917 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
4300 fts5DoclistIterNext(&i1); 4918 fts5DoclistIterNext(&i1);
4919 if( i1.aPoslist==0 ) break;
4301 } 4920 }
4302 else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ 4921 else if( i2.iRowid!=i1.iRowid ){
4303 /* Copy entry from i2 */ 4922 /* Copy entry from i2 */
4304 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); 4923 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4305 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize); 4924 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
4306 fts5DoclistIterNext(&i2); 4925 fts5DoclistIterNext(&i2);
4926 if( i2.aPoslist==0 ) break;
4307 } 4927 }
4308 else{ 4928 else{
4929 /* Merge the two position lists. */
4309 i64 iPos1 = 0; 4930 i64 iPos1 = 0;
4310 i64 iPos2 = 0; 4931 i64 iPos2 = 0;
4311 int iOff1 = 0; 4932 int iOff1 = 0;
4312 int iOff2 = 0; 4933 int iOff2 = 0;
4313 u8 *a1 = &i1.aPoslist[i1.nSize]; 4934 u8 *a1 = &i1.aPoslist[i1.nSize];
4314 u8 *a2 = &i2.aPoslist[i2.nSize]; 4935 u8 *a2 = &i2.aPoslist[i2.nSize];
4315 4936
4937 i64 iPrev = 0;
4316 Fts5PoslistWriter writer; 4938 Fts5PoslistWriter writer;
4317 memset(&writer, 0, sizeof(writer)); 4939 memset(&writer, 0, sizeof(writer));
4318 4940
4319 /* Merge the two position lists. */
4320 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); 4941 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
4321 fts5BufferZero(&tmp); 4942 fts5BufferZero(&tmp);
4943 sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
4944 if( p->rc ) break;
4322 4945
4323 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); 4946 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4324 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); 4947 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4948 assert( iPos1>=0 && iPos2>=0 );
4325 4949
4326 while( p->rc==SQLITE_OK && (iPos1>=0 || iPos2>=0) ){ 4950 if( iPos1<iPos2 ){
4327 i64 iNew; 4951 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4328 if( iPos2<0 || (iPos1>=0 && iPos1<iPos2) ){ 4952 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4329 iNew = iPos1; 4953 }else{
4330 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); 4954 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4331 }else{ 4955 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4332 iNew = iPos2; 4956 }
4333 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); 4957
4334 if( iPos1==iPos2 ){ 4958 if( iPos1>=0 && iPos2>=0 ){
4335 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1); 4959 while( 1 ){
4960 if( iPos1<iPos2 ){
4961 if( iPos1!=iPrev ){
4962 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4963 }
4964 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
4965 if( iPos1<0 ) break;
4966 }else{
4967 assert( iPos2!=iPrev );
4968 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4969 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
4970 if( iPos2<0 ) break;
4336 } 4971 }
4337 } 4972 }
4338 p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); 4973 }
4974
4975 if( iPos1>=0 ){
4976 if( iPos1!=iPrev ){
4977 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
4978 }
4979 fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
4980 }else{
4981 assert( iPos2>=0 && iPos2!=iPrev );
4982 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
4983 fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
4339 } 4984 }
4340 4985
4341 /* WRITEPOSLISTSIZE */ 4986 /* WRITEPOSLISTSIZE */
4342 fts5BufferSafeAppendVarint(&out, tmp.n * 2); 4987 fts5BufferSafeAppendVarint(&out, tmp.n * 2);
4343 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); 4988 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
4344 fts5DoclistIterNext(&i1); 4989 fts5DoclistIterNext(&i1);
4345 fts5DoclistIterNext(&i2); 4990 fts5DoclistIterNext(&i2);
4991 if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
4346 } 4992 }
4347 } 4993 }
4348 4994
4995 if( i1.aPoslist ){
4996 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
4997 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
4998 }
4999 else if( i2.aPoslist ){
5000 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
5001 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
5002 }
5003
4349 fts5BufferSet(&p->rc, p1, out.n, out.p); 5004 fts5BufferSet(&p->rc, p1, out.n, out.p);
4350 fts5BufferFree(&tmp); 5005 fts5BufferFree(&tmp);
4351 fts5BufferFree(&out); 5006 fts5BufferFree(&out);
4352 } 5007 }
4353 } 5008 }
4354 5009
4355 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4356 Fts5Buffer tmp = *p1;
4357 *p1 = *p2;
4358 *p2 = tmp;
4359 }
4360
4361 static void fts5SetupPrefixIter( 5010 static void fts5SetupPrefixIter(
4362 Fts5Index *p, /* Index to read from */ 5011 Fts5Index *p, /* Index to read from */
4363 int bDesc, /* True for "ORDER BY rowid DESC" */ 5012 int bDesc, /* True for "ORDER BY rowid DESC" */
4364 const u8 *pToken, /* Buffer containing prefix to match */ 5013 const u8 *pToken, /* Buffer containing prefix to match */
4365 int nToken, /* Size of buffer pToken in bytes */ 5014 int nToken, /* Size of buffer pToken in bytes */
4366 Fts5Colset *pColset, /* Restrict matches to these columns */ 5015 Fts5Colset *pColset, /* Restrict matches to these columns */
4367 Fts5IndexIter **ppIter /* OUT: New iterator */ 5016 Fts5Iter **ppIter /* OUT: New iterator */
4368 ){ 5017 ){
4369 Fts5Structure *pStruct; 5018 Fts5Structure *pStruct;
4370 Fts5Buffer *aBuf; 5019 Fts5Buffer *aBuf;
4371 const int nBuf = 32; 5020 const int nBuf = 32;
4372 5021
5022 void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
5023 void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
5024 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5025 xMerge = fts5MergeRowidLists;
5026 xAppend = fts5AppendRowid;
5027 }else{
5028 xMerge = fts5MergePrefixLists;
5029 xAppend = fts5AppendPoslist;
5030 }
5031
4373 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); 5032 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
4374 pStruct = fts5StructureRead(p); 5033 pStruct = fts5StructureRead(p);
4375 5034
4376 if( aBuf && pStruct ){ 5035 if( aBuf && pStruct ){
4377 const int flags = FTS5INDEX_QUERY_SCAN; 5036 const int flags = FTS5INDEX_QUERY_SCAN
5037 | FTS5INDEX_QUERY_SKIPEMPTY
5038 | FTS5INDEX_QUERY_NOOUTPUT;
4378 int i; 5039 int i;
4379 i64 iLastRowid = 0; 5040 i64 iLastRowid = 0;
4380 Fts5IndexIter *p1 = 0; /* Iterator used to gather data from index */ 5041 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
4381 Fts5Data *pData; 5042 Fts5Data *pData;
4382 Fts5Buffer doclist; 5043 Fts5Buffer doclist;
4383 int bNewTerm = 1; 5044 int bNewTerm = 1;
4384 5045
4385 memset(&doclist, 0, sizeof(doclist)); 5046 memset(&doclist, 0, sizeof(doclist));
4386 for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1); 5047 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5048 fts5IterSetOutputCb(&p->rc, p1);
5049 for( /* no-op */ ;
4387 fts5MultiIterEof(p, p1)==0; 5050 fts5MultiIterEof(p, p1)==0;
4388 fts5MultiIterNext2(p, p1, &bNewTerm) 5051 fts5MultiIterNext2(p, p1, &bNewTerm)
4389 ){ 5052 ){
4390 i64 iRowid = fts5MultiIterRowid(p1); 5053 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
4391 int nTerm; 5054 int nTerm = pSeg->term.n;
4392 const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); 5055 const u8 *pTerm = pSeg->term.p;
5056 p1->xSetOutputs(p1, pSeg);
5057
4393 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); 5058 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
4394 if( bNewTerm ){ 5059 if( bNewTerm ){
4395 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; 5060 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
4396 } 5061 }
4397 5062
4398 if( doclist.n>0 && iRowid<=iLastRowid ){ 5063 if( p1->base.nData==0 ) continue;
5064
5065 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
4399 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ 5066 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
4400 assert( i<nBuf ); 5067 assert( i<nBuf );
4401 if( aBuf[i].n==0 ){ 5068 if( aBuf[i].n==0 ){
4402 fts5BufferSwap(&doclist, &aBuf[i]); 5069 fts5BufferSwap(&doclist, &aBuf[i]);
4403 fts5BufferZero(&doclist); 5070 fts5BufferZero(&doclist);
4404 }else{ 5071 }else{
4405 fts5MergePrefixLists(p, &doclist, &aBuf[i]); 5072 xMerge(p, &doclist, &aBuf[i]);
4406 fts5BufferZero(&aBuf[i]); 5073 fts5BufferZero(&aBuf[i]);
4407 } 5074 }
4408 } 5075 }
4409 iLastRowid = 0; 5076 iLastRowid = 0;
4410 } 5077 }
4411 5078
4412 if( !fts5AppendPoslist(p, iRowid-iLastRowid, p1, pColset, &doclist) ){ 5079 xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
4413 iLastRowid = iRowid; 5080 iLastRowid = p1->base.iRowid;
4414 }
4415 } 5081 }
4416 5082
4417 for(i=0; i<nBuf; i++){ 5083 for(i=0; i<nBuf; i++){
4418 if( p->rc==SQLITE_OK ){ 5084 if( p->rc==SQLITE_OK ){
4419 fts5MergePrefixLists(p, &doclist, &aBuf[i]); 5085 xMerge(p, &doclist, &aBuf[i]);
4420 } 5086 }
4421 fts5BufferFree(&aBuf[i]); 5087 fts5BufferFree(&aBuf[i]);
4422 } 5088 }
4423 fts5MultiIterFree(p, p1); 5089 fts5MultiIterFree(p1);
4424 5090
4425 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); 5091 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
4426 if( pData ){ 5092 if( pData ){
4427 pData->p = (u8*)&pData[1]; 5093 pData->p = (u8*)&pData[1];
4428 pData->nn = pData->szLeaf = doclist.n; 5094 pData->nn = pData->szLeaf = doclist.n;
4429 memcpy(pData->p, doclist.p, doclist.n); 5095 memcpy(pData->p, doclist.p, doclist.n);
4430 fts5MultiIterNew2(p, pData, bDesc, ppIter); 5096 fts5MultiIterNew2(p, pData, bDesc, ppIter);
4431 } 5097 }
4432 fts5BufferFree(&doclist); 5098 fts5BufferFree(&doclist);
4433 } 5099 }
4434 5100
4435 fts5StructureRelease(pStruct); 5101 fts5StructureRelease(pStruct);
4436 sqlite3_free(aBuf); 5102 sqlite3_free(aBuf);
4437 } 5103 }
4438 5104
4439 5105
4440 /* 5106 /*
4441 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain 5107 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
4442 ** to the document with rowid iRowid. 5108 ** to the document with rowid iRowid.
4443 */ 5109 */
4444 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ 5110 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
4445 assert( p->rc==SQLITE_OK ); 5111 assert( p->rc==SQLITE_OK );
4446 5112
4447 /* Allocate the hash table if it has not already been allocated */ 5113 /* Allocate the hash table if it has not already been allocated */
4448 if( p->pHash==0 ){ 5114 if( p->pHash==0 ){
4449 p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData); 5115 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
4450 } 5116 }
4451 5117
4452 /* Flush the hash table to disk if required */ 5118 /* Flush the hash table to disk if required */
4453 if( iRowid<p->iWriteRowid 5119 if( iRowid<p->iWriteRowid
4454 || (iRowid==p->iWriteRowid && p->bDelete==0) 5120 || (iRowid==p->iWriteRowid && p->bDelete==0)
4455 || (p->nPendingData > p->pConfig->nHashSize) 5121 || (p->nPendingData > p->pConfig->nHashSize)
4456 ){ 5122 ){
4457 fts5IndexFlush(p); 5123 fts5IndexFlush(p);
4458 } 5124 }
4459 5125
(...skipping 14 matching lines...) Expand all
4474 5140
4475 /* 5141 /*
4476 ** Discard any data stored in the in-memory hash tables. Do not write it 5142 ** Discard any data stored in the in-memory hash tables. Do not write it
4477 ** to the database. Additionally, assume that the contents of the %_data 5143 ** to the database. Additionally, assume that the contents of the %_data
4478 ** table may have changed on disk. So any in-memory caches of %_data 5144 ** table may have changed on disk. So any in-memory caches of %_data
4479 ** records must be invalidated. 5145 ** records must be invalidated.
4480 */ 5146 */
4481 int sqlite3Fts5IndexRollback(Fts5Index *p){ 5147 int sqlite3Fts5IndexRollback(Fts5Index *p){
4482 fts5CloseReader(p); 5148 fts5CloseReader(p);
4483 fts5IndexDiscardData(p); 5149 fts5IndexDiscardData(p);
4484 assert( p->rc==SQLITE_OK ); 5150 fts5StructureInvalidate(p);
5151 /* assert( p->rc==SQLITE_OK ); */
4485 return SQLITE_OK; 5152 return SQLITE_OK;
4486 } 5153 }
4487 5154
4488 /* 5155 /*
4489 ** The %_data table is completely empty when this function is called. This 5156 ** The %_data table is completely empty when this function is called. This
4490 ** function populates it with the initial structure objects for each index, 5157 ** function populates it with the initial structure objects for each index,
4491 ** and the initial version of the "averages" record (a zero-byte blob). 5158 ** and the initial version of the "averages" record (a zero-byte blob).
4492 */ 5159 */
4493 int sqlite3Fts5IndexReinit(Fts5Index *p){ 5160 int sqlite3Fts5IndexReinit(Fts5Index *p){
4494 Fts5Structure s; 5161 Fts5Structure s;
5162 fts5StructureInvalidate(p);
4495 memset(&s, 0, sizeof(Fts5Structure)); 5163 memset(&s, 0, sizeof(Fts5Structure));
4496 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); 5164 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
4497 fts5StructureWrite(p, &s); 5165 fts5StructureWrite(p, &s);
4498 return fts5IndexReturn(p); 5166 return fts5IndexReturn(p);
4499 } 5167 }
4500 5168
4501 /* 5169 /*
4502 ** Open a new Fts5Index handle. If the bCreate argument is true, create 5170 ** Open a new Fts5Index handle. If the bCreate argument is true, create
4503 ** and initialize the underlying %_data table. 5171 ** and initialize the underlying %_data table.
4504 ** 5172 **
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
4543 return rc; 5211 return rc;
4544 } 5212 }
4545 5213
4546 /* 5214 /*
4547 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). 5215 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
4548 */ 5216 */
4549 int sqlite3Fts5IndexClose(Fts5Index *p){ 5217 int sqlite3Fts5IndexClose(Fts5Index *p){
4550 int rc = SQLITE_OK; 5218 int rc = SQLITE_OK;
4551 if( p ){ 5219 if( p ){
4552 assert( p->pReader==0 ); 5220 assert( p->pReader==0 );
5221 fts5StructureInvalidate(p);
4553 sqlite3_finalize(p->pWriter); 5222 sqlite3_finalize(p->pWriter);
4554 sqlite3_finalize(p->pDeleter); 5223 sqlite3_finalize(p->pDeleter);
4555 sqlite3_finalize(p->pIdxWriter); 5224 sqlite3_finalize(p->pIdxWriter);
4556 sqlite3_finalize(p->pIdxDeleter); 5225 sqlite3_finalize(p->pIdxDeleter);
4557 sqlite3_finalize(p->pIdxSelect); 5226 sqlite3_finalize(p->pIdxSelect);
5227 sqlite3_finalize(p->pDataVersion);
4558 sqlite3Fts5HashFree(p->pHash); 5228 sqlite3Fts5HashFree(p->pHash);
4559 sqlite3_free(p->zDataTbl); 5229 sqlite3_free(p->zDataTbl);
4560 sqlite3_free(p); 5230 sqlite3_free(p);
4561 } 5231 }
4562 return rc; 5232 return rc;
4563 } 5233 }
4564 5234
4565 /* 5235 /*
4566 ** Argument p points to a buffer containing utf-8 text that is n bytes in 5236 ** Argument p points to a buffer containing utf-8 text that is n bytes in
4567 ** size. Return the number of bytes in the nChar character prefix of the 5237 ** size. Return the number of bytes in the nChar character prefix of the
4568 ** buffer, or 0 if there are less than nChar characters in total. 5238 ** buffer, or 0 if there are less than nChar characters in total.
4569 */ 5239 */
4570 static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ 5240 int sqlite3Fts5IndexCharlenToBytelen(
5241 const char *p,
5242 int nByte,
5243 int nChar
5244 ){
4571 int n = 0; 5245 int n = 0;
4572 int i; 5246 int i;
4573 for(i=0; i<nChar; i++){ 5247 for(i=0; i<nChar; i++){
4574 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ 5248 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
4575 if( (unsigned char)p[n++]>=0xc0 ){ 5249 if( (unsigned char)p[n++]>=0xc0 ){
4576 while( (p[n] & 0xc0)==0x80 ) n++; 5250 while( (p[n] & 0xc0)==0x80 ) n++;
4577 } 5251 }
4578 } 5252 }
4579 return n; 5253 return n;
4580 } 5254 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
4617 5291
4618 assert( p->rc==SQLITE_OK ); 5292 assert( p->rc==SQLITE_OK );
4619 assert( (iCol<0)==p->bDelete ); 5293 assert( (iCol<0)==p->bDelete );
4620 5294
4621 /* Add the entry to the main terms index. */ 5295 /* Add the entry to the main terms index. */
4622 rc = sqlite3Fts5HashWrite( 5296 rc = sqlite3Fts5HashWrite(
4623 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken 5297 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
4624 ); 5298 );
4625 5299
4626 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ 5300 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
4627 int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); 5301 const int nChar = pConfig->aPrefix[i];
5302 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
4628 if( nByte ){ 5303 if( nByte ){
4629 rc = sqlite3Fts5HashWrite(p->pHash, 5304 rc = sqlite3Fts5HashWrite(p->pHash,
4630 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, 5305 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
4631 nByte 5306 nByte
4632 ); 5307 );
4633 } 5308 }
4634 } 5309 }
4635 5310
4636 return rc; 5311 return rc;
4637 } 5312 }
4638 5313
4639 /* 5314 /*
4640 ** Open a new iterator to iterate though all rowid that match the 5315 ** Open a new iterator to iterate though all rowid that match the
4641 ** specified token or token prefix. 5316 ** specified token or token prefix.
4642 */ 5317 */
4643 int sqlite3Fts5IndexQuery( 5318 int sqlite3Fts5IndexQuery(
4644 Fts5Index *p, /* FTS index to query */ 5319 Fts5Index *p, /* FTS index to query */
4645 const char *pToken, int nToken, /* Token (or prefix) to query for */ 5320 const char *pToken, int nToken, /* Token (or prefix) to query for */
4646 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ 5321 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
4647 Fts5Colset *pColset, /* Match these columns only */ 5322 Fts5Colset *pColset, /* Match these columns only */
4648 Fts5IndexIter **ppIter /* OUT: New iterator object */ 5323 Fts5IndexIter **ppIter /* OUT: New iterator object */
4649 ){ 5324 ){
4650 Fts5Config *pConfig = p->pConfig; 5325 Fts5Config *pConfig = p->pConfig;
4651 Fts5IndexIter *pRet = 0; 5326 Fts5Iter *pRet = 0;
4652 int iIdx = 0;
4653 Fts5Buffer buf = {0, 0, 0}; 5327 Fts5Buffer buf = {0, 0, 0};
4654 5328
4655 /* If the QUERY_SCAN flag is set, all other flags must be clear. */ 5329 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
4656 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); 5330 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
4657 5331
4658 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ 5332 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
5333 int iIdx = 0; /* Index to search */
4659 memcpy(&buf.p[1], pToken, nToken); 5334 memcpy(&buf.p[1], pToken, nToken);
4660 5335
4661 #ifdef SQLITE_DEBUG 5336 /* Figure out which index to search and set iIdx accordingly. If this
4662 /* If the QUERY_TEST_NOIDX flag was specified, then this must be a 5337 ** is a prefix query for which there is no prefix index, set iIdx to
5338 ** greater than pConfig->nPrefix to indicate that the query will be
5339 ** satisfied by scanning multiple terms in the main index.
5340 **
5341 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
4663 ** prefix-query. Instead of using a prefix-index (if one exists), 5342 ** prefix-query. Instead of using a prefix-index (if one exists),
4664 ** evaluate the prefix query using the main FTS index. This is used 5343 ** evaluate the prefix query using the main FTS index. This is used
4665 ** for internal sanity checking by the integrity-check in debug 5344 ** for internal sanity checking by the integrity-check in debug
4666 ** mode only. */ 5345 ** mode only. */
5346 #ifdef SQLITE_DEBUG
4667 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ 5347 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
4668 assert( flags & FTS5INDEX_QUERY_PREFIX ); 5348 assert( flags & FTS5INDEX_QUERY_PREFIX );
4669 iIdx = 1+pConfig->nPrefix; 5349 iIdx = 1+pConfig->nPrefix;
4670 }else 5350 }else
4671 #endif 5351 #endif
4672 if( flags & FTS5INDEX_QUERY_PREFIX ){ 5352 if( flags & FTS5INDEX_QUERY_PREFIX ){
4673 int nChar = fts5IndexCharlen(pToken, nToken); 5353 int nChar = fts5IndexCharlen(pToken, nToken);
4674 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ 5354 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
4675 if( pConfig->aPrefix[iIdx-1]==nChar ) break; 5355 if( pConfig->aPrefix[iIdx-1]==nChar ) break;
4676 } 5356 }
4677 } 5357 }
4678 5358
4679 if( iIdx<=pConfig->nPrefix ){ 5359 if( iIdx<=pConfig->nPrefix ){
5360 /* Straight index lookup */
4680 Fts5Structure *pStruct = fts5StructureRead(p); 5361 Fts5Structure *pStruct = fts5StructureRead(p);
4681 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); 5362 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
4682 if( pStruct ){ 5363 if( pStruct ){
4683 fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet); 5364 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
5365 pColset, buf.p, nToken+1, -1, 0, &pRet
5366 );
4684 fts5StructureRelease(pStruct); 5367 fts5StructureRelease(pStruct);
4685 } 5368 }
4686 }else{ 5369 }else{
5370 /* Scan multiple terms in the main index */
4687 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; 5371 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
4688 buf.p[0] = FTS5_MAIN_PREFIX; 5372 buf.p[0] = FTS5_MAIN_PREFIX;
4689 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); 5373 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
5374 assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
5375 fts5IterSetOutputCb(&p->rc, pRet);
5376 if( p->rc==SQLITE_OK ){
5377 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
5378 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
5379 }
4690 } 5380 }
4691 5381
4692 if( p->rc ){ 5382 if( p->rc ){
4693 sqlite3Fts5IterClose(pRet); 5383 sqlite3Fts5IterClose(&pRet->base);
4694 pRet = 0; 5384 pRet = 0;
4695 fts5CloseReader(p); 5385 fts5CloseReader(p);
4696 } 5386 }
4697 *ppIter = pRet; 5387
5388 *ppIter = &pRet->base;
4698 sqlite3Fts5BufferFree(&buf); 5389 sqlite3Fts5BufferFree(&buf);
4699 } 5390 }
4700 return fts5IndexReturn(p); 5391 return fts5IndexReturn(p);
4701 } 5392 }
4702 5393
4703 /* 5394 /*
4704 ** Return true if the iterator passed as the only argument is at EOF. 5395 ** Return true if the iterator passed as the only argument is at EOF.
4705 */ 5396 */
4706 int sqlite3Fts5IterEof(Fts5IndexIter *pIter){
4707 assert( pIter->pIndex->rc==SQLITE_OK );
4708 return pIter->bEof;
4709 }
4710
4711 /* 5397 /*
4712 ** Move to the next matching rowid. 5398 ** Move to the next matching rowid.
4713 */ 5399 */
4714 int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ 5400 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
5401 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
4715 assert( pIter->pIndex->rc==SQLITE_OK ); 5402 assert( pIter->pIndex->rc==SQLITE_OK );
4716 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); 5403 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
4717 return fts5IndexReturn(pIter->pIndex); 5404 return fts5IndexReturn(pIter->pIndex);
4718 } 5405 }
4719 5406
4720 /* 5407 /*
4721 ** Move to the next matching term/rowid. Used by the fts5vocab module. 5408 ** Move to the next matching term/rowid. Used by the fts5vocab module.
4722 */ 5409 */
4723 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ 5410 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
5411 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
4724 Fts5Index *p = pIter->pIndex; 5412 Fts5Index *p = pIter->pIndex;
4725 5413
4726 assert( pIter->pIndex->rc==SQLITE_OK ); 5414 assert( pIter->pIndex->rc==SQLITE_OK );
4727 5415
4728 fts5MultiIterNext(p, pIter, 0, 0); 5416 fts5MultiIterNext(p, pIter, 0, 0);
4729 if( p->rc==SQLITE_OK ){ 5417 if( p->rc==SQLITE_OK ){
4730 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 5418 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4731 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ 5419 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
4732 fts5DataRelease(pSeg->pLeaf); 5420 fts5DataRelease(pSeg->pLeaf);
4733 pSeg->pLeaf = 0; 5421 pSeg->pLeaf = 0;
4734 pIter->bEof = 1; 5422 pIter->base.bEof = 1;
4735 } 5423 }
4736 } 5424 }
4737 5425
4738 return fts5IndexReturn(pIter->pIndex); 5426 return fts5IndexReturn(pIter->pIndex);
4739 } 5427 }
4740 5428
4741 /* 5429 /*
4742 ** Move to the next matching rowid that occurs at or after iMatch. The 5430 ** Move to the next matching rowid that occurs at or after iMatch. The
4743 ** definition of "at or after" depends on whether this iterator iterates 5431 ** definition of "at or after" depends on whether this iterator iterates
4744 ** in ascending or descending rowid order. 5432 ** in ascending or descending rowid order.
4745 */ 5433 */
4746 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ 5434 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
5435 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
4747 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); 5436 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
4748 return fts5IndexReturn(pIter->pIndex); 5437 return fts5IndexReturn(pIter->pIndex);
4749 } 5438 }
4750 5439
4751 /* 5440 /*
4752 ** Return the current rowid.
4753 */
4754 i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
4755 return fts5MultiIterRowid(pIter);
4756 }
4757
4758 /*
4759 ** Return the current term. 5441 ** Return the current term.
4760 */ 5442 */
4761 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ 5443 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
4762 int n; 5444 int n;
4763 const char *z = (const char*)fts5MultiIterTerm(pIter, &n); 5445 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
4764 *pn = n-1; 5446 *pn = n-1;
4765 return &z[1]; 5447 return &z[1];
4766 } 5448 }
4767 5449
4768
4769 static int fts5IndexExtractColset (
4770 Fts5Colset *pColset, /* Colset to filter on */
4771 const u8 *pPos, int nPos, /* Position list */
4772 Fts5Buffer *pBuf /* Output buffer */
4773 ){
4774 int rc = SQLITE_OK;
4775 int i;
4776
4777 fts5BufferZero(pBuf);
4778 for(i=0; i<pColset->nCol; i++){
4779 const u8 *pSub = pPos;
4780 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
4781 if( nSub ){
4782 fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
4783 }
4784 }
4785 return rc;
4786 }
4787
4788
4789 /*
4790 ** Return a pointer to a buffer containing a copy of the position list for
4791 ** the current entry. Output variable *pn is set to the size of the buffer
4792 ** in bytes before returning.
4793 **
4794 ** The returned position list does not include the "number of bytes" varint
4795 ** field that starts the position list on disk.
4796 */
4797 int sqlite3Fts5IterPoslist(
4798 Fts5IndexIter *pIter,
4799 Fts5Colset *pColset, /* Column filter (or NULL) */
4800 const u8 **pp, /* OUT: Pointer to position-list data */
4801 int *pn, /* OUT: Size of position-list in bytes */
4802 i64 *piRowid /* OUT: Current rowid */
4803 ){
4804 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4805 assert( pIter->pIndex->rc==SQLITE_OK );
4806 *piRowid = pSeg->iRowid;
4807 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
4808 u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
4809 if( pColset==0 || pIter->bFiltered ){
4810 *pn = pSeg->nPos;
4811 *pp = pPos;
4812 }else if( pColset->nCol==1 ){
4813 *pp = pPos;
4814 *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]);
4815 }else{
4816 fts5BufferZero(&pIter->poslist);
4817 fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist);
4818 *pp = pIter->poslist.p;
4819 *pn = pIter->poslist.n;
4820 }
4821 }else{
4822 fts5BufferZero(&pIter->poslist);
4823 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
4824 *pp = pIter->poslist.p;
4825 *pn = pIter->poslist.n;
4826 }
4827 return fts5IndexReturn(pIter->pIndex);
4828 }
4829
4830 /*
4831 ** This function is similar to sqlite3Fts5IterPoslist(), except that it
4832 ** copies the position list into the buffer supplied as the second
4833 ** argument.
4834 */
4835 int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
4836 Fts5Index *p = pIter->pIndex;
4837 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4838 assert( p->rc==SQLITE_OK );
4839 fts5BufferZero(pBuf);
4840 fts5SegiterPoslist(p, pSeg, 0, pBuf);
4841 return fts5IndexReturn(p);
4842 }
4843
4844 /* 5450 /*
4845 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). 5451 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
4846 */ 5452 */
4847 void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ 5453 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
4848 if( pIter ){ 5454 if( pIndexIter ){
5455 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
4849 Fts5Index *pIndex = pIter->pIndex; 5456 Fts5Index *pIndex = pIter->pIndex;
4850 fts5MultiIterFree(pIter->pIndex, pIter); 5457 fts5MultiIterFree(pIter);
4851 fts5CloseReader(pIndex); 5458 fts5CloseReader(pIndex);
4852 } 5459 }
4853 } 5460 }
4854 5461
4855 /* 5462 /*
4856 ** Read and decode the "averages" record from the database. 5463 ** Read and decode the "averages" record from the database.
4857 ** 5464 **
4858 ** Parameter anSize must point to an array of size nCol, where nCol is 5465 ** Parameter anSize must point to an array of size nCol, where nCol is
4859 ** the number of user defined columns in the FTS table. 5466 ** the number of user defined columns in the FTS table.
4860 */ 5467 */
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
4933 5540
4934 /************************************************************************* 5541 /*************************************************************************
4935 ************************************************************************** 5542 **************************************************************************
4936 ** Below this point is the implementation of the integrity-check 5543 ** Below this point is the implementation of the integrity-check
4937 ** functionality. 5544 ** functionality.
4938 */ 5545 */
4939 5546
4940 /* 5547 /*
4941 ** Return a simple checksum value based on the arguments. 5548 ** Return a simple checksum value based on the arguments.
4942 */ 5549 */
4943 static u64 fts5IndexEntryCksum( 5550 u64 sqlite3Fts5IndexEntryCksum(
4944 i64 iRowid, 5551 i64 iRowid,
4945 int iCol, 5552 int iCol,
4946 int iPos, 5553 int iPos,
4947 int iIdx, 5554 int iIdx,
4948 const char *pTerm, 5555 const char *pTerm,
4949 int nTerm 5556 int nTerm
4950 ){ 5557 ){
4951 int i; 5558 int i;
4952 u64 ret = iRowid; 5559 u64 ret = iRowid;
4953 ret += (ret<<3) + iCol; 5560 ret += (ret<<3) + iCol;
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
5003 } 5610 }
5004 5611
5005 static int fts5QueryCksum( 5612 static int fts5QueryCksum(
5006 Fts5Index *p, /* Fts5 index object */ 5613 Fts5Index *p, /* Fts5 index object */
5007 int iIdx, 5614 int iIdx,
5008 const char *z, /* Index key to query for */ 5615 const char *z, /* Index key to query for */
5009 int n, /* Size of index key in bytes */ 5616 int n, /* Size of index key in bytes */
5010 int flags, /* Flags for Fts5IndexQuery */ 5617 int flags, /* Flags for Fts5IndexQuery */
5011 u64 *pCksum /* IN/OUT: Checksum value */ 5618 u64 *pCksum /* IN/OUT: Checksum value */
5012 ){ 5619 ){
5620 int eDetail = p->pConfig->eDetail;
5013 u64 cksum = *pCksum; 5621 u64 cksum = *pCksum;
5014 Fts5IndexIter *pIdxIter = 0; 5622 Fts5IndexIter *pIter = 0;
5015 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter); 5623 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
5016 5624
5017 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ 5625 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
5018 i64 dummy; 5626 i64 rowid = pIter->iRowid;
5019 const u8 *pPos; 5627
5020 int nPos; 5628 if( eDetail==FTS5_DETAIL_NONE ){
5021 i64 rowid = sqlite3Fts5IterRowid(pIdxIter); 5629 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
5022 rc = sqlite3Fts5IterPoslist(pIdxIter, 0, &pPos, &nPos, &dummy); 5630 }else{
5023 if( rc==SQLITE_OK ){
5024 Fts5PoslistReader sReader; 5631 Fts5PoslistReader sReader;
5025 for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader); 5632 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
5026 sReader.bEof==0; 5633 sReader.bEof==0;
5027 sqlite3Fts5PoslistReaderNext(&sReader) 5634 sqlite3Fts5PoslistReaderNext(&sReader)
5028 ){ 5635 ){
5029 int iCol = FTS5_POS2COLUMN(sReader.iPos); 5636 int iCol = FTS5_POS2COLUMN(sReader.iPos);
5030 int iOff = FTS5_POS2OFFSET(sReader.iPos); 5637 int iOff = FTS5_POS2OFFSET(sReader.iPos);
5031 cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); 5638 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5032 } 5639 }
5033 rc = sqlite3Fts5IterNext(pIdxIter); 5640 }
5641 if( rc==SQLITE_OK ){
5642 rc = sqlite3Fts5IterNext(pIter);
5034 } 5643 }
5035 } 5644 }
5036 sqlite3Fts5IterClose(pIdxIter); 5645 sqlite3Fts5IterClose(pIter);
5037 5646
5038 *pCksum = cksum; 5647 *pCksum = cksum;
5039 return rc; 5648 return rc;
5040 } 5649 }
5041 5650
5042 5651
5043 /* 5652 /*
5044 ** This function is also purely an internal test. It does not contribute to 5653 ** This function is also purely an internal test. It does not contribute to
5045 ** FTS functionality, or even the integrity-check, in any way. 5654 ** FTS functionality, or even the integrity-check, in any way.
5046 */ 5655 */
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after
5214 5823
5215 int nIdxTerm = sqlite3_column_bytes(pStmt, 1); 5824 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
5216 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); 5825 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
5217 int iIdxLeaf = sqlite3_column_int(pStmt, 2); 5826 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
5218 int bIdxDlidx = sqlite3_column_int(pStmt, 3); 5827 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
5219 5828
5220 /* If the leaf in question has already been trimmed from the segment, 5829 /* If the leaf in question has already been trimmed from the segment,
5221 ** ignore this b-tree entry. Otherwise, load it into memory. */ 5830 ** ignore this b-tree entry. Otherwise, load it into memory. */
5222 if( iIdxLeaf<pSeg->pgnoFirst ) continue; 5831 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
5223 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); 5832 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
5224 pLeaf = fts5DataRead(p, iRow); 5833 pLeaf = fts5LeafRead(p, iRow);
5225 if( pLeaf==0 ) break; 5834 if( pLeaf==0 ) break;
5226 5835
5227 /* Check that the leaf contains at least one term, and that it is equal 5836 /* Check that the leaf contains at least one term, and that it is equal
5228 ** to or larger than the split-key in zIdxTerm. Also check that if there 5837 ** to or larger than the split-key in zIdxTerm. Also check that if there
5229 ** is also a rowid pointer within the leaf page header, it points to a 5838 ** is also a rowid pointer within the leaf page header, it points to a
5230 ** location before the term. */ 5839 ** location before the term. */
5231 if( pLeaf->nn<=pLeaf->szLeaf ){ 5840 if( pLeaf->nn<=pLeaf->szLeaf ){
5232 p->rc = FTS5_CORRUPT; 5841 p->rc = FTS5_CORRUPT;
5233 }else{ 5842 }else{
5234 int iOff; /* Offset of first term on leaf */ 5843 int iOff; /* Offset of first term on leaf */
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
5320 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ 5929 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
5321 p->rc = FTS5_CORRUPT; 5930 p->rc = FTS5_CORRUPT;
5322 } 5931 }
5323 #endif 5932 #endif
5324 } 5933 }
5325 5934
5326 5935
5327 /* 5936 /*
5328 ** Run internal checks to ensure that the FTS index (a) is internally 5937 ** Run internal checks to ensure that the FTS index (a) is internally
5329 ** consistent and (b) contains entries for which the XOR of the checksums 5938 ** consistent and (b) contains entries for which the XOR of the checksums
5330 ** as calculated by fts5IndexEntryCksum() is cksum. 5939 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
5331 ** 5940 **
5332 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the 5941 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
5333 ** checksum does not match. Return SQLITE_OK if all checks pass without 5942 ** checksum does not match. Return SQLITE_OK if all checks pass without
5334 ** error, or some other SQLite error code if another error (e.g. OOM) 5943 ** error, or some other SQLite error code if another error (e.g. OOM)
5335 ** occurs. 5944 ** occurs.
5336 */ 5945 */
5337 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ 5946 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
5947 int eDetail = p->pConfig->eDetail;
5338 u64 cksum2 = 0; /* Checksum based on contents of indexes */ 5948 u64 cksum2 = 0; /* Checksum based on contents of indexes */
5339 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ 5949 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
5340 Fts5IndexIter *pIter; /* Used to iterate through entire index */ 5950 Fts5Iter *pIter; /* Used to iterate through entire index */
5341 Fts5Structure *pStruct; /* Index structure */ 5951 Fts5Structure *pStruct; /* Index structure */
5342 5952
5343 #ifdef SQLITE_DEBUG 5953 #ifdef SQLITE_DEBUG
5344 /* Used by extra internal tests only run if NDEBUG is not defined */ 5954 /* Used by extra internal tests only run if NDEBUG is not defined */
5345 u64 cksum3 = 0; /* Checksum based on contents of indexes */ 5955 u64 cksum3 = 0; /* Checksum based on contents of indexes */
5346 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ 5956 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
5347 #endif 5957 #endif
5958 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
5348 5959
5349 /* Load the FTS index structure */ 5960 /* Load the FTS index structure */
5350 pStruct = fts5StructureRead(p); 5961 pStruct = fts5StructureRead(p);
5351 5962
5352 /* Check that the internal nodes of each segment match the leaves */ 5963 /* Check that the internal nodes of each segment match the leaves */
5353 if( pStruct ){ 5964 if( pStruct ){
5354 int iLvl, iSeg; 5965 int iLvl, iSeg;
5355 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 5966 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
5356 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 5967 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5357 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; 5968 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
5358 fts5IndexIntegrityCheckSegment(p, pSeg); 5969 fts5IndexIntegrityCheckSegment(p, pSeg);
5359 } 5970 }
5360 } 5971 }
5361 } 5972 }
5362 5973
5363 /* The cksum argument passed to this function is a checksum calculated 5974 /* The cksum argument passed to this function is a checksum calculated
5364 ** based on all expected entries in the FTS index (including prefix index 5975 ** based on all expected entries in the FTS index (including prefix index
5365 ** entries). This block checks that a checksum calculated based on the 5976 ** entries). This block checks that a checksum calculated based on the
5366 ** actual contents of FTS index is identical. 5977 ** actual contents of FTS index is identical.
5367 ** 5978 **
5368 ** Two versions of the same checksum are calculated. The first (stack 5979 ** Two versions of the same checksum are calculated. The first (stack
5369 ** variable cksum2) based on entries extracted from the full-text index 5980 ** variable cksum2) based on entries extracted from the full-text index
5370 ** while doing a linear scan of each individual index in turn. 5981 ** while doing a linear scan of each individual index in turn.
5371 ** 5982 **
5372 ** As each term visited by the linear scans, a separate query for the 5983 ** As each term visited by the linear scans, a separate query for the
5373 ** same term is performed. cksum3 is calculated based on the entries 5984 ** same term is performed. cksum3 is calculated based on the entries
5374 ** extracted by these queries. 5985 ** extracted by these queries.
5375 */ 5986 */
5376 for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter); 5987 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
5377 fts5MultiIterEof(p, pIter)==0; 5988 fts5MultiIterEof(p, pIter)==0;
5378 fts5MultiIterNext(p, pIter, 0, 0) 5989 fts5MultiIterNext(p, pIter, 0, 0)
5379 ){ 5990 ){
5380 int n; /* Size of term in bytes */ 5991 int n; /* Size of term in bytes */
5381 i64 iPos = 0; /* Position read from poslist */ 5992 i64 iPos = 0; /* Position read from poslist */
5382 int iOff = 0; /* Offset within poslist */ 5993 int iOff = 0; /* Offset within poslist */
5383 i64 iRowid = fts5MultiIterRowid(pIter); 5994 i64 iRowid = fts5MultiIterRowid(pIter);
5384 char *z = (char*)fts5MultiIterTerm(pIter, &n); 5995 char *z = (char*)fts5MultiIterTerm(pIter, &n);
5385 5996
5386 /* If this is a new term, query for it. Update cksum3 with the results. */ 5997 /* If this is a new term, query for it. Update cksum3 with the results. */
5387 fts5TestTerm(p, &term, z, n, cksum2, &cksum3); 5998 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
5388 5999
5389 poslist.n = 0; 6000 if( eDetail==FTS5_DETAIL_NONE ){
5390 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst] , 0, &poslist); 6001 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
5391 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ 6002 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
5392 int iCol = FTS5_POS2COLUMN(iPos); 6003 }
5393 int iTokOff = FTS5_POS2OFFSET(iPos); 6004 }else{
5394 cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); 6005 poslist.n = 0;
6006 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6007 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6008 int iCol = FTS5_POS2COLUMN(iPos);
6009 int iTokOff = FTS5_POS2OFFSET(iPos);
6010 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6011 }
5395 } 6012 }
5396 } 6013 }
5397 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); 6014 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
5398 6015
5399 fts5MultiIterFree(p, pIter); 6016 fts5MultiIterFree(pIter);
5400 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; 6017 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
5401 6018
5402 fts5StructureRelease(pStruct); 6019 fts5StructureRelease(pStruct);
5403 #ifdef SQLITE_DEBUG 6020 #ifdef SQLITE_DEBUG
5404 fts5BufferFree(&term); 6021 fts5BufferFree(&term);
5405 #endif 6022 #endif
5406 fts5BufferFree(&poslist); 6023 fts5BufferFree(&poslist);
5407 return fts5IndexReturn(p); 6024 return fts5IndexReturn(p);
5408 } 6025 }
5409 6026
5410
5411 /*
5412 ** Calculate and return a checksum that is the XOR of the index entry
5413 ** checksum of all entries that would be generated by the token specified
5414 ** by the final 5 arguments.
5415 */
5416 u64 sqlite3Fts5IndexCksum(
5417 Fts5Config *pConfig, /* Configuration object */
5418 i64 iRowid, /* Document term appears in */
5419 int iCol, /* Column term appears in */
5420 int iPos, /* Position term appears in */
5421 const char *pTerm, int nTerm /* Term at iPos */
5422 ){
5423 u64 ret = 0; /* Return value */
5424 int iIdx; /* For iterating through indexes */
5425
5426 ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);
5427
5428 for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
5429 int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
5430 if( nByte ){
5431 ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
5432 }
5433 }
5434
5435 return ret;
5436 }
5437
5438 /************************************************************************* 6027 /*************************************************************************
5439 ************************************************************************** 6028 **************************************************************************
5440 ** Below this point is the implementation of the fts5_decode() scalar 6029 ** Below this point is the implementation of the fts5_decode() scalar
5441 ** function only. 6030 ** function only.
5442 */ 6031 */
5443 6032
5444 /* 6033 /*
5445 ** Decode a segment-data rowid from the %_data table. This function is 6034 ** Decode a segment-data rowid from the %_data table. This function is
5446 ** the opposite of macro FTS5_SEGMENT_ROWID(). 6035 ** the opposite of macro FTS5_SEGMENT_ROWID().
5447 */ 6036 */
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
5596 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); 6185 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
5597 iDocid += iDelta; 6186 iDocid += iDelta;
5598 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); 6187 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
5599 } 6188 }
5600 } 6189 }
5601 6190
5602 return iOff; 6191 return iOff;
5603 } 6192 }
5604 6193
5605 /* 6194 /*
6195 ** This function is part of the fts5_decode() debugging function. It is
6196 ** only ever used with detail=none tables.
6197 **
6198 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6199 ** tables. This function appends a human-readable version of that list to
6200 ** buffer pBuf.
6201 **
6202 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6203 ** no-op. If an OOM or other error occurs within this function, *pRc is
6204 ** set to an SQLite error code before returning. The final state of buffer
6205 ** pBuf is undefined in this case.
6206 */
6207 static void fts5DecodeRowidList(
6208 int *pRc, /* IN/OUT: Error code */
6209 Fts5Buffer *pBuf, /* Buffer to append text to */
6210 const u8 *pData, int nData /* Data to decode list-of-rowids from */
6211 ){
6212 int i = 0;
6213 i64 iRowid = 0;
6214
6215 while( i<nData ){
6216 const char *zApp = "";
6217 u64 iVal;
6218 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6219 iRowid += iVal;
6220
6221 if( i<nData && pData[i]==0x00 ){
6222 i++;
6223 if( i<nData && pData[i]==0x00 ){
6224 i++;
6225 zApp = "+";
6226 }else{
6227 zApp = "*";
6228 }
6229 }
6230
6231 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6232 }
6233 }
6234
6235 /*
5606 ** The implementation of user-defined scalar function fts5_decode(). 6236 ** The implementation of user-defined scalar function fts5_decode().
5607 */ 6237 */
5608 static void fts5DecodeFunction( 6238 static void fts5DecodeFunction(
5609 sqlite3_context *pCtx, /* Function call context */ 6239 sqlite3_context *pCtx, /* Function call context */
5610 int nArg, /* Number of args (always 2) */ 6240 int nArg, /* Number of args (always 2) */
5611 sqlite3_value **apVal /* Function arguments */ 6241 sqlite3_value **apVal /* Function arguments */
5612 ){ 6242 ){
5613 i64 iRowid; /* Rowid for record being decoded */ 6243 i64 iRowid; /* Rowid for record being decoded */
5614 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ 6244 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
5615 const u8 *aBlob; int n; /* Record to decode */ 6245 const u8 *aBlob; int n; /* Record to decode */
5616 u8 *a = 0; 6246 u8 *a = 0;
5617 Fts5Buffer s; /* Build up text to return here */ 6247 Fts5Buffer s; /* Build up text to return here */
5618 int rc = SQLITE_OK; /* Return code */ 6248 int rc = SQLITE_OK; /* Return code */
5619 int nSpace = 0; 6249 int nSpace = 0;
6250 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
5620 6251
5621 assert( nArg==2 ); 6252 assert( nArg==2 );
6253 UNUSED_PARAM(nArg);
5622 memset(&s, 0, sizeof(Fts5Buffer)); 6254 memset(&s, 0, sizeof(Fts5Buffer));
5623 iRowid = sqlite3_value_int64(apVal[0]); 6255 iRowid = sqlite3_value_int64(apVal[0]);
5624 6256
5625 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] 6257 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
5626 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents 6258 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
5627 ** buffer overreads even if the record is corrupt. */ 6259 ** buffer overreads even if the record is corrupt. */
5628 n = sqlite3_value_bytes(apVal[1]); 6260 n = sqlite3_value_bytes(apVal[1]);
5629 aBlob = sqlite3_value_blob(apVal[1]); 6261 aBlob = sqlite3_value_blob(apVal[1]);
5630 nSpace = n + FTS5_DATA_ZERO_PADDING; 6262 nSpace = n + FTS5_DATA_ZERO_PADDING;
5631 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); 6263 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
(...skipping 19 matching lines...) Expand all
5651 sqlite3Fts5BufferAppendPrintf(&rc, &s, 6283 sqlite3Fts5BufferAppendPrintf(&rc, &s,
5652 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid 6284 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
5653 ); 6285 );
5654 } 6286 }
5655 }else if( iSegid==0 ){ 6287 }else if( iSegid==0 ){
5656 if( iRowid==FTS5_AVERAGES_ROWID ){ 6288 if( iRowid==FTS5_AVERAGES_ROWID ){
5657 fts5DecodeAverages(&rc, &s, a, n); 6289 fts5DecodeAverages(&rc, &s, a, n);
5658 }else{ 6290 }else{
5659 fts5DecodeStructure(&rc, &s, a, n); 6291 fts5DecodeStructure(&rc, &s, a, n);
5660 } 6292 }
6293 }else if( eDetailNone ){
6294 Fts5Buffer term; /* Current term read from page */
6295 int szLeaf;
6296 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6297 int iTermOff;
6298 int nKeep = 0;
6299 int iOff;
6300
6301 memset(&term, 0, sizeof(Fts5Buffer));
6302
6303 /* Decode any entries that occur before the first term. */
6304 if( szLeaf<n ){
6305 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
6306 }else{
6307 iTermOff = szLeaf;
6308 }
6309 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
6310
6311 iOff = iTermOff;
6312 while( iOff<szLeaf ){
6313 int nAppend;
6314
6315 /* Read the term data for the next term*/
6316 iOff += fts5GetVarint32(&a[iOff], nAppend);
6317 term.n = nKeep;
6318 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
6319 sqlite3Fts5BufferAppendPrintf(
6320 &rc, &s, " term=%.*s", term.n, (const char*)term.p
6321 );
6322 iOff += nAppend;
6323
6324 /* Figure out where the doclist for this term ends */
6325 if( iPgidxOff<n ){
6326 int nIncr;
6327 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
6328 iTermOff += nIncr;
6329 }else{
6330 iTermOff = szLeaf;
6331 }
6332
6333 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
6334 iOff = iTermOff;
6335 if( iOff<szLeaf ){
6336 iOff += fts5GetVarint32(&a[iOff], nKeep);
6337 }
6338 }
6339
6340 fts5BufferFree(&term);
5661 }else{ 6341 }else{
5662 Fts5Buffer term; /* Current term read from page */ 6342 Fts5Buffer term; /* Current term read from page */
5663 int szLeaf; /* Offset of pgidx in a[] */ 6343 int szLeaf; /* Offset of pgidx in a[] */
5664 int iPgidxOff; 6344 int iPgidxOff;
5665 int iPgidxPrev = 0; /* Previous value read from pgidx */ 6345 int iPgidxPrev = 0; /* Previous value read from pgidx */
5666 int iTermOff = 0; 6346 int iTermOff = 0;
5667 int iRowidOff = 0; 6347 int iRowidOff = 0;
5668 int iOff; 6348 int iOff;
5669 int nDoclist; 6349 int nDoclist;
5670 6350
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
5778 ** connection db. It registers several user-defined scalar functions useful 6458 ** connection db. It registers several user-defined scalar functions useful
5779 ** with FTS5. 6459 ** with FTS5.
5780 ** 6460 **
5781 ** If successful, SQLITE_OK is returned. If an error occurs, some other 6461 ** If successful, SQLITE_OK is returned. If an error occurs, some other
5782 ** SQLite error code is returned instead. 6462 ** SQLite error code is returned instead.
5783 */ 6463 */
5784 int sqlite3Fts5IndexInit(sqlite3 *db){ 6464 int sqlite3Fts5IndexInit(sqlite3 *db){
5785 int rc = sqlite3_create_function( 6465 int rc = sqlite3_create_function(
5786 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 6466 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
5787 ); 6467 );
6468
6469 if( rc==SQLITE_OK ){
6470 rc = sqlite3_create_function(
6471 db, "fts5_decode_none", 2,
6472 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
6473 );
6474 }
6475
5788 if( rc==SQLITE_OK ){ 6476 if( rc==SQLITE_OK ){
5789 rc = sqlite3_create_function( 6477 rc = sqlite3_create_function(
5790 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 6478 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
5791 ); 6479 );
5792 } 6480 }
5793 return rc; 6481 return rc;
5794 } 6482 }
5795 6483
6484
6485 int sqlite3Fts5IndexReset(Fts5Index *p){
6486 assert( p->pStruct==0 || p->iStructVersion!=0 );
6487 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
6488 fts5StructureInvalidate(p);
6489 }
6490 return fts5IndexReturn(p);
6491 }
OLDNEW
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5_hash.c ('k') | third_party/sqlite/src/ext/fts5/fts5_main.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698