Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(265)

Unified Diff: third_party/sqlite/src/ext/fts5/fts5_index.c

Issue 2751253002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5_hash.c ('k') | third_party/sqlite/src/ext/fts5/fts5_main.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/sqlite/src/ext/fts5/fts5_index.c
diff --git a/third_party/sqlite/src/ext/fts5/fts5_index.c b/third_party/sqlite/src/ext/fts5/fts5_index.c
index c11abda5ba41128d44defa9a57216ce7cbc9e314..750e0ca82d0ffabae7f0a13fc8a25fef6eb5ea7a 100644
--- a/third_party/sqlite/src/ext/fts5/fts5_index.c
+++ b/third_party/sqlite/src/ext/fts5/fts5_index.c
@@ -261,6 +261,7 @@ typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
+typedef struct Fts5Iter Fts5Iter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
@@ -303,6 +304,10 @@ struct Fts5Index {
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
sqlite3_stmt *pIdxSelect;
int nRead; /* Total number of blocks read */
+
+ sqlite3_stmt *pDataVersion;
+ i64 iStructVersion; /* data_version when pStruct read */
+ Fts5Structure *pStruct; /* Current db structure (or NULL) */
};
struct Fts5DoclistIter {
@@ -433,6 +438,9 @@ struct Fts5SegIter {
Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
int iLeafOffset; /* Byte offset within current leaf */
+ /* Next method */
+ void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
+
/* The page and offset from which the current term was read. The offset
** is the offset of the first rowid in the current doclist. */
int iTermLeafPgno;
@@ -452,7 +460,7 @@ struct Fts5SegIter {
Fts5Buffer term; /* Current term */
i64 iRowid; /* Current rowid */
int nPos; /* Number of bytes in current position list */
- int bDel; /* True if the delete flag is set */
+ u8 bDel; /* True if the delete flag is set */
};
/*
@@ -466,7 +474,6 @@ struct Fts5SegIter {
#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02
-
/*
** Argument is a pointer to an Fts5Data structure that contains a leaf
** page. This macro evaluates to true if the leaf contains no terms, or
@@ -501,16 +508,20 @@ struct Fts5SegIter {
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
** There is no way to tell if this is populated or not.
*/
-struct Fts5IndexIter {
+struct Fts5Iter {
+ Fts5IndexIter base; /* Base class containing output vars */
+
Fts5Index *pIndex; /* Index that owns this iterator */
Fts5Structure *pStruct; /* Database structure for this iterator */
Fts5Buffer poslist; /* Buffer containing current poslist */
+ Fts5Colset *pColset; /* Restrict matches to these columns */
+
+ /* Invoked to set output variables. */
+ void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
int nSeg; /* Size of aSeg[] array */
int bRev; /* True to iterate in reverse order */
u8 bSkipEmpty; /* True to skip deleted entries */
- u8 bEof; /* True at EOF */
- u8 bFiltered; /* True if column-filter already applied */
i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
Fts5CResult *aFirst; /* Current merge state (see above) */
@@ -600,17 +611,6 @@ static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
return (res==0 ? (pLeft->n - pRight->n) : res);
}
-#ifdef SQLITE_DEBUG
-static int fts5BlobCompare(
- const u8 *pLeft, int nLeft,
- const u8 *pRight, int nRight
-){
- int nCmp = MIN(nLeft, nRight);
- int res = memcmp(pLeft, pRight, nCmp);
- return (res==0 ? (nLeft - nRight) : res);
-}
-#endif
-
static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
int ret;
fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
@@ -710,6 +710,18 @@ static void fts5DataRelease(Fts5Data *pData){
sqlite3_free(pData);
}
+static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
+ Fts5Data *pRet = fts5DataRead(p, iRowid);
+ if( pRet ){
+ if( pRet->szLeaf>pRet->nn ){
+ p->rc = FTS5_CORRUPT;
+ fts5DataRelease(pRet);
+ pRet = 0;
+ }
+ }
+ return pRet;
+}
+
static int fts5IndexPrepareStmt(
Fts5Index *p,
sqlite3_stmt **ppStmt,
@@ -869,28 +881,37 @@ static int fts5StructureDecode(
for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
- int nTotal;
+ int nTotal = 0;
int iSeg;
- i += fts5GetVarint32(&pData[i], pLvl->nMerge);
- i += fts5GetVarint32(&pData[i], nTotal);
- assert( nTotal>=pLvl->nMerge );
- pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
- nTotal * sizeof(Fts5StructureSegment)
- );
+ if( i>=nData ){
+ rc = FTS5_CORRUPT;
+ }else{
+ i += fts5GetVarint32(&pData[i], pLvl->nMerge);
+ i += fts5GetVarint32(&pData[i], nTotal);
+ assert( nTotal>=pLvl->nMerge );
+ pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
+ nTotal * sizeof(Fts5StructureSegment)
+ );
+ }
if( rc==SQLITE_OK ){
pLvl->nSeg = nTotal;
for(iSeg=0; iSeg<nTotal; iSeg++){
+ if( i>=nData ){
+ rc = FTS5_CORRUPT;
+ break;
+ }
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
}
- }else{
- fts5StructureRelease(pRet);
- pRet = 0;
}
}
+ if( rc!=SQLITE_OK ){
+ fts5StructureRelease(pRet);
+ pRet = 0;
+ }
}
*ppOut = pRet;
@@ -953,6 +974,50 @@ static void fts5StructureExtendLevel(
}
}
+static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
+ Fts5Structure *pRet = 0;
+ Fts5Config *pConfig = p->pConfig;
+ int iCookie; /* Configuration cookie */
+ Fts5Data *pData;
+
+ pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
+ if( p->rc==SQLITE_OK ){
+ /* TODO: Do we need this if the leaf-index is appended? Probably... */
+ memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
+ p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
+ if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
+ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
+ }
+ fts5DataRelease(pData);
+ if( p->rc!=SQLITE_OK ){
+ fts5StructureRelease(pRet);
+ pRet = 0;
+ }
+ }
+
+ return pRet;
+}
+
+static i64 fts5IndexDataVersion(Fts5Index *p){
+ i64 iVersion = 0;
+
+ if( p->rc==SQLITE_OK ){
+ if( p->pDataVersion==0 ){
+ p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
+ sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
+ );
+ if( p->rc ) return 0;
+ }
+
+ if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
+ iVersion = sqlite3_column_int64(p->pDataVersion, 0);
+ }
+ p->rc = sqlite3_reset(p->pDataVersion);
+ }
+
+ return iVersion;
+}
+
/*
** Read, deserialize and return the structure record.
**
@@ -965,26 +1030,49 @@ static void fts5StructureExtendLevel(
** is called, it is a no-op.
*/
static Fts5Structure *fts5StructureRead(Fts5Index *p){
- Fts5Config *pConfig = p->pConfig;
- Fts5Structure *pRet = 0; /* Object to return */
- int iCookie; /* Configuration cookie */
- Fts5Data *pData;
- pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
- if( p->rc ) return 0;
- /* TODO: Do we need this if the leaf-index is appended? Probably... */
- memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
- p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
- if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
- p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
+ if( p->pStruct==0 ){
+ p->iStructVersion = fts5IndexDataVersion(p);
+ if( p->rc==SQLITE_OK ){
+ p->pStruct = fts5StructureReadUncached(p);
+ }
}
- fts5DataRelease(pData);
- if( p->rc!=SQLITE_OK ){
- fts5StructureRelease(pRet);
- pRet = 0;
+#if 0
+ else{
+ Fts5Structure *pTest = fts5StructureReadUncached(p);
+ if( pTest ){
+ int i, j;
+ assert_nc( p->pStruct->nSegment==pTest->nSegment );
+ assert_nc( p->pStruct->nLevel==pTest->nLevel );
+ for(i=0; i<pTest->nLevel; i++){
+ assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
+ assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
+ for(j=0; j<pTest->aLevel[i].nSeg; j++){
+ Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
+ Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
+ assert_nc( p1->iSegid==p2->iSegid );
+ assert_nc( p1->pgnoFirst==p2->pgnoFirst );
+ assert_nc( p1->pgnoLast==p2->pgnoLast );
+ }
+ }
+ fts5StructureRelease(pTest);
+ }
+ }
+#endif
+
+ if( p->rc!=SQLITE_OK ) return 0;
+ assert( p->iStructVersion!=0 );
+ assert( p->pStruct!=0 );
+ fts5StructureRef(p->pStruct);
+ return p->pStruct;
+}
+
+static void fts5StructureInvalidate(Fts5Index *p){
+ if( p->pStruct ){
+ fts5StructureRelease(p->pStruct);
+ p->pStruct = 0;
}
- return pRet;
}
/*
@@ -1442,7 +1530,7 @@ static void fts5SegIterNextPage(
pIter->pLeaf = pIter->pNextLeaf;
pIter->pNextLeaf = 0;
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
- pIter->pLeaf = fts5DataRead(p,
+ pIter->pLeaf = fts5LeafRead(p,
FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
);
}else{
@@ -1492,13 +1580,29 @@ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
if( p->rc==SQLITE_OK ){
int iOff = pIter->iLeafOffset; /* Offset to read at */
- int nSz;
ASSERT_SZLEAF_OK(pIter->pLeaf);
- fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
- pIter->bDel = (nSz & 0x0001);
- pIter->nPos = nSz>>1;
+ if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
+ pIter->bDel = 0;
+ pIter->nPos = 1;
+ if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
+ pIter->bDel = 1;
+ iOff++;
+ if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
+ pIter->nPos = 1;
+ iOff++;
+ }else{
+ pIter->nPos = 0;
+ }
+ }
+ }else{
+ int nSz;
+ fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
+ pIter->bDel = (nSz & 0x0001);
+ pIter->nPos = nSz>>1;
+ assert_nc( pIter->nPos>=0 );
+ }
pIter->iLeafOffset = iOff;
- assert_nc( pIter->nPos>=0 );
}
}
@@ -1541,6 +1645,10 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
int nNew; /* Bytes of new data */
iOff += fts5GetVarint32(&a[iOff], nNew);
+ if( iOff+nNew>pIter->pLeaf->nn ){
+ p->rc = FTS5_CORRUPT;
+ return;
+ }
pIter->term.n = nKeep;
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
iOff += nNew;
@@ -1559,6 +1667,20 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
fts5SegIterLoadRowid(p, pIter);
}
+static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
+static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
+static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
+
+static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
+ if( pIter->flags & FTS5_SEGITER_REVERSE ){
+ pIter->xNext = fts5SegIterNext_Reverse;
+ }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ pIter->xNext = fts5SegIterNext_None;
+ }else{
+ pIter->xNext = fts5SegIterNext;
+ }
+}
+
/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when
@@ -1584,6 +1706,7 @@ static void fts5SegIterInit(
if( p->rc==SQLITE_OK ){
memset(pIter, 0, sizeof(*pIter));
+ fts5SegIterSetNext(p, pIter);
pIter->pSeg = pSeg;
pIter->iLeafPgno = pSeg->pgnoFirst-1;
fts5SegIterNextPage(p, pIter);
@@ -1615,6 +1738,7 @@ static void fts5SegIterInit(
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
+ int eDetail = p->pConfig->eDetail;
int n = pIter->pLeaf->szLeaf;
int i = pIter->iLeafOffset;
u8 *a = pIter->pLeaf->p;
@@ -1627,15 +1751,24 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
ASSERT_SZLEAF_OK(pIter->pLeaf);
while( 1 ){
i64 iDelta = 0;
- int nPos;
- int bDummy;
- i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
- i += nPos;
+ if( eDetail==FTS5_DETAIL_NONE ){
+ /* todo */
+ if( i<n && a[i]==0 ){
+ i++;
+ if( i<n && a[i]==0 ) i++;
+ }
+ }else{
+ int nPos;
+ int bDummy;
+ i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
+ i += nPos;
+ }
if( i>=n ) break;
i += fts5GetVarint(&a[i], (u64*)&iDelta);
pIter->iRowid += iDelta;
+ /* If necessary, grow the pIter->aRowidOffset[] array. */
if( iRowidOffset>=pIter->nRowidOffset ){
int nNew = pIter->nRowidOffset + 8;
int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
@@ -1709,12 +1842,116 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
-static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){
+static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
}
/*
+** Advance iterator pIter to the next entry.
+**
+** This version of fts5SegIterNext() is only used by reverse iterators.
+*/
+static void fts5SegIterNext_Reverse(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ int *pbUnused /* Unused */
+){
+ assert( pIter->flags & FTS5_SEGITER_REVERSE );
+ assert( pIter->pNextLeaf==0 );
+ UNUSED_PARAM(pbUnused);
+
+ if( pIter->iRowidOffset>0 ){
+ u8 *a = pIter->pLeaf->p;
+ int iOff;
+ i64 iDelta;
+
+ pIter->iRowidOffset--;
+ pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
+ fts5SegIterLoadNPos(p, pIter);
+ iOff = pIter->iLeafOffset;
+ if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
+ iOff += pIter->nPos;
+ }
+ fts5GetVarint(&a[iOff], (u64*)&iDelta);
+ pIter->iRowid -= iDelta;
+ }else{
+ fts5SegIterReverseNewPage(p, pIter);
+ }
+}
+
+/*
+** Advance iterator pIter to the next entry.
+**
+** This version of fts5SegIterNext() is only used if detail=none and the
+** iterator is not a reverse direction iterator.
+*/
+static void fts5SegIterNext_None(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ int *pbNewTerm /* OUT: Set for new term */
+){
+ int iOff;
+
+ assert( p->rc==SQLITE_OK );
+ assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
+ assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
+
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ iOff = pIter->iLeafOffset;
+
+ /* Next entry is on the next page */
+ if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
+ fts5SegIterNextPage(p, pIter);
+ if( p->rc || pIter->pLeaf==0 ) return;
+ pIter->iRowid = 0;
+ iOff = 4;
+ }
+
+ if( iOff<pIter->iEndofDoclist ){
+ /* Next entry is on the current page */
+ i64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
+ pIter->iLeafOffset = iOff;
+ pIter->iRowid += iDelta;
+ }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
+ if( pIter->pSeg ){
+ int nKeep = 0;
+ if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
+ iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
+ }
+ pIter->iLeafOffset = iOff;
+ fts5SegIterLoadTerm(p, pIter, nKeep);
+ }else{
+ const u8 *pList = 0;
+ const char *zTerm = 0;
+ int nList;
+ sqlite3Fts5HashScanNext(p->pHash);
+ sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
+ if( pList==0 ) goto next_none_eof;
+ pIter->pLeaf->p = (u8*)pList;
+ pIter->pLeaf->nn = nList;
+ pIter->pLeaf->szLeaf = nList;
+ pIter->iEndofDoclist = nList;
+ sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
+ pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
+ }
+
+ if( pbNewTerm ) *pbNewTerm = 1;
+ }else{
+ goto next_none_eof;
+ }
+
+ fts5SegIterLoadNPos(p, pIter);
+
+ return;
+ next_none_eof:
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+}
+
+
+/*
** Advance iterator pIter to the next entry.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
@@ -1726,141 +1963,132 @@ static void fts5SegIterNext(
Fts5SegIter *pIter, /* Iterator to advance */
int *pbNewTerm /* OUT: Set for new term */
){
+ Fts5Data *pLeaf = pIter->pLeaf;
+ int iOff;
+ int bNewTerm = 0;
+ int nKeep = 0;
+ u8 *a;
+ int n;
+
assert( pbNewTerm==0 || *pbNewTerm==0 );
- if( p->rc==SQLITE_OK ){
- if( pIter->flags & FTS5_SEGITER_REVERSE ){
- assert( pIter->pNextLeaf==0 );
- if( pIter->iRowidOffset>0 ){
- u8 *a = pIter->pLeaf->p;
- int iOff;
- int nPos;
- int bDummy;
- i64 iDelta;
-
- pIter->iRowidOffset--;
- pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
- iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
- iOff += nPos;
- fts5GetVarint(&a[iOff], (u64*)&iDelta);
- pIter->iRowid -= iDelta;
- fts5SegIterLoadNPos(p, pIter);
- }else{
- fts5SegIterReverseNewPage(p, pIter);
+ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
+
+ /* Search for the end of the position list within the current page. */
+ a = pLeaf->p;
+ n = pLeaf->szLeaf;
+
+ ASSERT_SZLEAF_OK(pLeaf);
+ iOff = pIter->iLeafOffset + pIter->nPos;
+
+ if( iOff<n ){
+ /* The next entry is on the current page. */
+ assert_nc( iOff<=pIter->iEndofDoclist );
+ if( iOff>=pIter->iEndofDoclist ){
+ bNewTerm = 1;
+ if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
+ iOff += fts5GetVarint32(&a[iOff], nKeep);
}
}else{
- Fts5Data *pLeaf = pIter->pLeaf;
- int iOff;
- int bNewTerm = 0;
- int nKeep = 0;
-
- /* Search for the end of the position list within the current page. */
- u8 *a = pLeaf->p;
- int n = pLeaf->szLeaf;
+ u64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
+ pIter->iRowid += iDelta;
+ assert_nc( iDelta>0 );
+ }
+ pIter->iLeafOffset = iOff;
+ }else if( pIter->pSeg==0 ){
+ const u8 *pList = 0;
+ const char *zTerm = 0;
+ int nList = 0;
+ assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
+ if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
+ sqlite3Fts5HashScanNext(p->pHash);
+ sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
+ }
+ if( pList==0 ){
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+ }else{
+ pIter->pLeaf->p = (u8*)pList;
+ pIter->pLeaf->nn = nList;
+ pIter->pLeaf->szLeaf = nList;
+ pIter->iEndofDoclist = nList+1;
+ sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
+ (u8*)zTerm);
+ pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
+ *pbNewTerm = 1;
+ }
+ }else{
+ iOff = 0;
+ /* Next entry is not on the current page */
+ while( iOff==0 ){
+ fts5SegIterNextPage(p, pIter);
+ pLeaf = pIter->pLeaf;
+ if( pLeaf==0 ) break;
ASSERT_SZLEAF_OK(pLeaf);
- iOff = pIter->iLeafOffset + pIter->nPos;
-
- if( iOff<n ){
- /* The next entry is on the current page. */
- assert_nc( iOff<=pIter->iEndofDoclist );
- if( iOff>=pIter->iEndofDoclist ){
- bNewTerm = 1;
- if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
- iOff += fts5GetVarint32(&a[iOff], nKeep);
- }
- }else{
- u64 iDelta;
- iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
- pIter->iRowid += iDelta;
- assert_nc( iDelta>0 );
- }
+ if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
+ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
- }else if( pIter->pSeg==0 ){
- const u8 *pList = 0;
- const char *zTerm = 0;
- int nList = 0;
- assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
- if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
- sqlite3Fts5HashScanNext(p->pHash);
- sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
- }
- if( pList==0 ){
- fts5DataRelease(pIter->pLeaf);
- pIter->pLeaf = 0;
- }else{
- pIter->pLeaf->p = (u8*)pList;
- pIter->pLeaf->nn = nList;
- pIter->pLeaf->szLeaf = nList;
- pIter->iEndofDoclist = nList+1;
- sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
- (u8*)zTerm);
- pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
- *pbNewTerm = 1;
- }
- }else{
- iOff = 0;
- /* Next entry is not on the current page */
- while( iOff==0 ){
- fts5SegIterNextPage(p, pIter);
- pLeaf = pIter->pLeaf;
- if( pLeaf==0 ) break;
- ASSERT_SZLEAF_OK(pLeaf);
- if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
- iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
- pIter->iLeafOffset = iOff;
-
- if( pLeaf->nn>pLeaf->szLeaf ){
- pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
- &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
- );
- }
-
- }
- else if( pLeaf->nn>pLeaf->szLeaf ){
- pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
- &pLeaf->p[pLeaf->szLeaf], iOff
- );
- pIter->iLeafOffset = iOff;
- pIter->iEndofDoclist = iOff;
- bNewTerm = 1;
- }
- if( iOff>=pLeaf->szLeaf ){
- p->rc = FTS5_CORRUPT;
- return;
- }
+ if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
+ );
}
}
+ else if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], iOff
+ );
+ pIter->iLeafOffset = iOff;
+ pIter->iEndofDoclist = iOff;
+ bNewTerm = 1;
+ }
+ assert_nc( iOff<pLeaf->szLeaf );
+ if( iOff>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ return;
+ }
+ }
+ }
- /* Check if the iterator is now at EOF. If so, return early. */
- if( pIter->pLeaf ){
- if( bNewTerm ){
- if( pIter->flags & FTS5_SEGITER_ONETERM ){
- fts5DataRelease(pIter->pLeaf);
- pIter->pLeaf = 0;
- }else{
- fts5SegIterLoadTerm(p, pIter, nKeep);
- fts5SegIterLoadNPos(p, pIter);
- if( pbNewTerm ) *pbNewTerm = 1;
- }
- }else{
- /* The following could be done by calling fts5SegIterLoadNPos(). But
- ** this block is particularly performance critical, so equivalent
- ** code is inlined. */
- int nSz;
- assert( p->rc==SQLITE_OK );
- fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
- pIter->bDel = (nSz & 0x0001);
- pIter->nPos = nSz>>1;
- assert_nc( pIter->nPos>=0 );
- }
+ /* Check if the iterator is now at EOF. If so, return early. */
+ if( pIter->pLeaf ){
+ if( bNewTerm ){
+ if( pIter->flags & FTS5_SEGITER_ONETERM ){
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+ }else{
+ fts5SegIterLoadTerm(p, pIter, nKeep);
+ fts5SegIterLoadNPos(p, pIter);
+ if( pbNewTerm ) *pbNewTerm = 1;
}
+ }else{
+ /* The following could be done by calling fts5SegIterLoadNPos(). But
+ ** this block is particularly performance critical, so equivalent
+ ** code is inlined.
+ **
+ ** Later: Switched back to fts5SegIterLoadNPos() because it supports
+ ** detail=none mode. Not ideal.
+ */
+ int nSz;
+ assert( p->rc==SQLITE_OK );
+ assert( pIter->iLeafOffset<=pIter->pLeaf->nn );
+ fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
+ pIter->bDel = (nSz & 0x0001);
+ pIter->nPos = nSz>>1;
+ assert_nc( pIter->nPos>=0 );
}
}
}
#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
+#define fts5IndexSkipVarint(a, iOff) { \
+ int iEnd = iOff+9; \
+ while( (a[iOff++] & 0x80) && iOff<iEnd ); \
+}
+
/*
** Iterator pIter currently points to the first rowid in a doclist. This
** function sets the iterator up so that iterates in reverse order through
@@ -1881,7 +2109,14 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
/* Currently, Fts5SegIter.iLeafOffset points to the first byte of
** position-list content for the current rowid. Back it up so that it
** points to the start of the position-list size field. */
- pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
+ int iPoslist;
+ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
+ iPoslist = pIter->iTermLeafOffset;
+ }else{
+ iPoslist = 4;
+ }
+ fts5IndexSkipVarint(pLeaf->p, iPoslist);
+ pIter->iLeafOffset = iPoslist;
/* If this condition is true then the largest rowid for the current
** term may not be stored on the current page. So search forward to
@@ -1965,11 +2200,6 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}
-#define fts5IndexSkipVarint(a, iOff) { \
- int iEnd = iOff+9; \
- while( (a[iOff++] & 0x80) && iOff<iEnd ); \
-}
-
/*
** The iterator object passed as the second argument currently contains
** no valid values except for the Fts5SegIter.pLeaf member variable. This
@@ -2007,6 +2237,10 @@ static void fts5LeafSeek(
iPgidx = szLeaf;
iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
iOff = iTermOff;
+ if( iOff>n ){
+ p->rc = FTS5_CORRUPT;
+ return;
+ }
while( 1 ){
@@ -2046,6 +2280,11 @@ static void fts5LeafSeek(
iTermOff += nKeep;
iOff = iTermOff;
+ if( iOff>=n ){
+ p->rc = FTS5_CORRUPT;
+ return;
+ }
+
/* Read the nKeep field of the next term. */
fts5FastGetVarint32(a, iOff, nKeep);
}
@@ -2098,6 +2337,18 @@ static void fts5LeafSeek(
fts5SegIterLoadNPos(p, pIter);
}
+static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
+ if( p->pIdxSelect==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
+ "SELECT pgno FROM '%q'.'%q_idx' WHERE "
+ "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
+ pConfig->zDb, pConfig->zName
+ ));
+ }
+ return p->pIdxSelect;
+}
+
/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
@@ -2107,7 +2358,6 @@ static void fts5LeafSeek(
*/
static void fts5SegIterSeekInit(
Fts5Index *p, /* FTS5 backend */
- Fts5Buffer *pBuf, /* Buffer to use for loading pages */
const u8 *pTerm, int nTerm, /* Term to seek to */
int flags, /* Mask of FTS5INDEX_XXX flags */
Fts5StructureSegment *pSeg, /* Description of segment */
@@ -2116,9 +2366,7 @@ static void fts5SegIterSeekInit(
int iPg = 1;
int bGe = (flags & FTS5INDEX_QUERY_SCAN);
int bDlidx = 0; /* True if there is a doclist-index */
-
- static int nCall = 0;
- nCall++;
+ sqlite3_stmt *pIdxSelect = 0;
assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
assert( pTerm && nTerm );
@@ -2127,23 +2375,16 @@ static void fts5SegIterSeekInit(
/* This block sets stack variable iPg to the leaf page number that may
** contain term (pTerm/nTerm), if it is present in the segment. */
- if( p->pIdxSelect==0 ){
- Fts5Config *pConfig = p->pConfig;
- fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
- "SELECT pgno FROM '%q'.'%q_idx' WHERE "
- "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
- pConfig->zDb, pConfig->zName
- ));
- }
+ pIdxSelect = fts5IdxSelectStmt(p);
if( p->rc ) return;
- sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid);
- sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
- if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){
- i64 val = sqlite3_column_int(p->pIdxSelect, 0);
+ sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
+ sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
+ if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
+ i64 val = sqlite3_column_int(pIdxSelect, 0);
iPg = (int)(val>>1);
bDlidx = (val & 0x0001);
}
- p->rc = sqlite3_reset(p->pIdxSelect);
+ p->rc = sqlite3_reset(pIdxSelect);
if( iPg<pSeg->pgnoFirst ){
iPg = pSeg->pgnoFirst;
@@ -2172,6 +2413,8 @@ static void fts5SegIterSeekInit(
}
}
+ fts5SegIterSetNext(p, pIter);
+
/* Either:
**
** 1) an error has occurred, or
@@ -2229,7 +2472,7 @@ static void fts5SegIterHashInit(
pLeaf->nn = pLeaf->szLeaf = nList;
pIter->pLeaf = pLeaf;
pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
- pIter->iEndofDoclist = pLeaf->nn+1;
+ pIter->iEndofDoclist = pLeaf->nn;
if( flags & FTS5INDEX_QUERY_DESC ){
pIter->flags |= FTS5_SEGITER_REVERSE;
@@ -2238,6 +2481,8 @@ static void fts5SegIterHashInit(
fts5SegIterLoadNPos(p, pIter);
}
}
+
+ fts5SegIterSetNext(p, pIter);
}
/*
@@ -2261,7 +2506,7 @@ static void fts5SegIterClear(Fts5SegIter *pIter){
** two iterators.
*/
static void fts5AssertComparisonResult(
- Fts5IndexIter *pIter,
+ Fts5Iter *pIter,
Fts5SegIter *p1,
Fts5SegIter *p2,
Fts5CResult *pRes
@@ -2302,12 +2547,12 @@ static void fts5AssertComparisonResult(
** statement used to verify that the contents of the pIter->aFirst[] array
** are correct.
*/
-static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5IndexIter *pIter){
+static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
if( p->rc==SQLITE_OK ){
Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
int i;
- assert( (pFirst->pLeaf==0)==pIter->bEof );
+ assert( (pFirst->pLeaf==0)==pIter->base.bEof );
/* Check that pIter->iSwitchRowid is set correctly. */
for(i=0; i<pIter->nSeg; i++){
@@ -2347,7 +2592,7 @@ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5IndexIter *pIter){
** to a key that is a duplicate of another, higher priority,
** segment-iterator in the pSeg->aSeg[] array.
*/
-static int fts5MultiIterDoCompare(Fts5IndexIter *pIter, int iOut){
+static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
int i1; /* Index of left-hand Fts5SegIter */
int i2; /* Index of right-hand Fts5SegIter */
int iRes;
@@ -2481,7 +2726,7 @@ static void fts5SegIterNextFrom(
}
do{
- if( bMove ) fts5SegIterNext(p, pIter, 0);
+ if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
if( pIter->pLeaf==0 ) break;
if( bRev==0 && pIter->iRowid>=iMatch ) break;
if( bRev!=0 && pIter->iRowid<=iMatch ) break;
@@ -2493,7 +2738,7 @@ static void fts5SegIterNextFrom(
/*
** Free the iterator object passed as the second argument.
*/
-static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){
+static void fts5MultiIterFree(Fts5Iter *pIter){
if( pIter ){
int i;
for(i=0; i<pIter->nSeg; i++){
@@ -2507,7 +2752,7 @@ static void fts5MultiIterFree(Fts5Index *p, Fts5IndexIter *pIter){
static void fts5MultiIterAdvanced(
Fts5Index *p, /* FTS5 backend to iterate within */
- Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */
+ Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
int iChanged, /* Index of sub-iterator just advanced */
int iMinset /* Minimum entry in aFirst[] to set */
){
@@ -2515,7 +2760,9 @@ static void fts5MultiIterAdvanced(
for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
int iEq;
if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
- fts5SegIterNext(p, &pIter->aSeg[iEq], 0);
+ Fts5SegIter *pSeg = &pIter->aSeg[iEq];
+ assert( p->rc==SQLITE_OK );
+ pSeg->xNext(p, pSeg, 0);
i = pIter->nSeg + iEq;
}
}
@@ -2532,9 +2779,9 @@ static void fts5MultiIterAdvanced(
** that it deals with more complicated cases as well.
*/
static int fts5MultiIterAdvanceRowid(
- Fts5Index *p, /* FTS5 backend to iterate within */
- Fts5IndexIter *pIter, /* Iterator to update aFirst[] array for */
- int iChanged /* Index of sub-iterator just advanced */
+ Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
+ int iChanged, /* Index of sub-iterator just advanced */
+ Fts5SegIter **ppFirst
){
Fts5SegIter *pNew = &pIter->aSeg[iChanged];
@@ -2567,15 +2814,16 @@ static int fts5MultiIterAdvanceRowid(
}
}
+ *ppFirst = pNew;
return 0;
}
/*
** Set the pIter->bEof variable based on the state of the sub-iterators.
*/
-static void fts5MultiIterSetEof(Fts5IndexIter *pIter){
+static void fts5MultiIterSetEof(Fts5Iter *pIter){
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
- pIter->bEof = pSeg->pLeaf==0;
+ pIter->base.bEof = pSeg->pLeaf==0;
pIter->iSwitchRowid = pSeg->iRowid;
}
@@ -2588,39 +2836,45 @@ static void fts5MultiIterSetEof(Fts5IndexIter *pIter){
*/
static void fts5MultiIterNext(
Fts5Index *p,
- Fts5IndexIter *pIter,
+ Fts5Iter *pIter,
int bFrom, /* True if argument iFrom is valid */
i64 iFrom /* Advance at least as far as this */
){
- if( p->rc==SQLITE_OK ){
- int bUseFrom = bFrom;
- do {
- int iFirst = pIter->aFirst[1].iFirst;
- int bNewTerm = 0;
- Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
- assert( p->rc==SQLITE_OK );
- if( bUseFrom && pSeg->pDlidx ){
- fts5SegIterNextFrom(p, pSeg, iFrom);
- }else{
- fts5SegIterNext(p, pSeg, &bNewTerm);
- }
+ int bUseFrom = bFrom;
+ assert( pIter->base.bEof==0 );
+ while( p->rc==SQLITE_OK ){
+ int iFirst = pIter->aFirst[1].iFirst;
+ int bNewTerm = 0;
+ Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
+ assert( p->rc==SQLITE_OK );
+ if( bUseFrom && pSeg->pDlidx ){
+ fts5SegIterNextFrom(p, pSeg, iFrom);
+ }else{
+ pSeg->xNext(p, pSeg, &bNewTerm);
+ }
- if( pSeg->pLeaf==0 || bNewTerm
- || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
- ){
- fts5MultiIterAdvanced(p, pIter, iFirst, 1);
- fts5MultiIterSetEof(pIter);
- }
- fts5AssertMultiIterSetup(p, pIter);
+ if( pSeg->pLeaf==0 || bNewTerm
+ || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
+ ){
+ fts5MultiIterAdvanced(p, pIter, iFirst, 1);
+ fts5MultiIterSetEof(pIter);
+ pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
+ if( pSeg->pLeaf==0 ) return;
+ }
- bUseFrom = 0;
- }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );
+ fts5AssertMultiIterSetup(p, pIter);
+ assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
+ if( pIter->bSkipEmpty==0 || pSeg->nPos ){
+ pIter->xSetOutputs(pIter, pSeg);
+ return;
+ }
+ bUseFrom = 0;
}
}
static void fts5MultiIterNext2(
Fts5Index *p,
- Fts5IndexIter *pIter,
+ Fts5Iter *pIter,
int *pbNewTerm /* OUT: True if *might* be new term */
){
assert( pIter->bSkipEmpty );
@@ -2630,9 +2884,10 @@ static void fts5MultiIterNext2(
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
int bNewTerm = 0;
- fts5SegIterNext(p, pSeg, &bNewTerm);
+ assert( p->rc==SQLITE_OK );
+ pSeg->xNext(p, pSeg, &bNewTerm);
if( pSeg->pLeaf==0 || bNewTerm
- || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
+ || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
){
fts5MultiIterAdvanced(p, pIter, iFirst, 1);
fts5MultiIterSetEof(pIter);
@@ -2646,17 +2901,20 @@ static void fts5MultiIterNext2(
}
}
+static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
+ UNUSED_PARAM2(pUnused1, pUnused2);
+}
-static Fts5IndexIter *fts5MultiIterAlloc(
+static Fts5Iter *fts5MultiIterAlloc(
Fts5Index *p, /* FTS5 backend to iterate within */
int nSeg
){
- Fts5IndexIter *pNew;
+ Fts5Iter *pNew;
int nSlot; /* Power of two >= nSeg */
for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
pNew = fts5IdxMalloc(p,
- sizeof(Fts5IndexIter) + /* pNew */
+ sizeof(Fts5Iter) + /* pNew */
sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
);
@@ -2664,12 +2922,433 @@ static Fts5IndexIter *fts5MultiIterAlloc(
pNew->nSeg = nSlot;
pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
pNew->pIndex = p;
+ pNew->xSetOutputs = fts5IterSetOutputs_Noop;
}
return pNew;
}
+static void fts5PoslistCallback(
+ Fts5Index *pUnused,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ UNUSED_PARAM(pUnused);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
+ }
+}
+
+typedef struct PoslistCallbackCtx PoslistCallbackCtx;
+struct PoslistCallbackCtx {
+ Fts5Buffer *pBuf; /* Append to this buffer */
+ Fts5Colset *pColset; /* Restrict matches to this column */
+ int eState; /* See above */
+};
+
+typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
+struct PoslistOffsetsCtx {
+ Fts5Buffer *pBuf; /* Append to this buffer */
+ Fts5Colset *pColset; /* Restrict matches to this column */
+ int iRead;
+ int iWrite;
+};
+
/*
-** Allocate a new Fts5IndexIter object.
+** TODO: Make this more efficient!
+*/
+static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
+ int i;
+ for(i=0; i<pColset->nCol; i++){
+ if( pColset->aiCol[i]==iCol ) return 1;
+ }
+ return 0;
+}
+
+static void fts5PoslistOffsetsCallback(
+ Fts5Index *pUnused,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
+ UNUSED_PARAM(pUnused);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ int i = 0;
+ while( i<nChunk ){
+ int iVal;
+ i += fts5GetVarint32(&pChunk[i], iVal);
+ iVal += pCtx->iRead - 2;
+ pCtx->iRead = iVal;
+ if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
+ fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
+ pCtx->iWrite = iVal;
+ }
+ }
+ }
+}
+
+static void fts5PoslistFilterCallback(
+ Fts5Index *pUnused,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
+ UNUSED_PARAM(pUnused);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ /* Search through to find the first varint with value 1. This is the
+ ** start of the next columns hits. */
+ int i = 0;
+ int iStart = 0;
+
+ if( pCtx->eState==2 ){
+ int iCol;
+ fts5FastGetVarint32(pChunk, i, iCol);
+ if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
+ pCtx->eState = 1;
+ fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
+ }else{
+ pCtx->eState = 0;
+ }
+ }
+
+ do {
+ while( i<nChunk && pChunk[i]!=0x01 ){
+ while( pChunk[i] & 0x80 ) i++;
+ i++;
+ }
+ if( pCtx->eState ){
+ fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
+ }
+ if( i<nChunk ){
+ int iCol;
+ iStart = i;
+ i++;
+ if( i>=nChunk ){
+ pCtx->eState = 2;
+ }else{
+ fts5FastGetVarint32(pChunk, i, iCol);
+ pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
+ if( pCtx->eState ){
+ fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
+ iStart = i;
+ }
+ }
+ }
+ }while( i<nChunk );
+ }
+}
+
+static void fts5ChunkIterate(
+ Fts5Index *p, /* Index object */
+ Fts5SegIter *pSeg, /* Poslist of this iterator */
+ void *pCtx, /* Context pointer for xChunk callback */
+ void (*xChunk)(Fts5Index*, void*, const u8*, int)
+){
+ int nRem = pSeg->nPos; /* Number of bytes still to come */
+ Fts5Data *pData = 0;
+ u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
+ int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
+ int pgno = pSeg->iLeafPgno;
+ int pgnoSave = 0;
+
+ /* This function does notmwork with detail=none databases. */
+ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
+
+ if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
+ pgnoSave = pgno+1;
+ }
+
+ while( 1 ){
+ xChunk(p, pCtx, pChunk, nChunk);
+ nRem -= nChunk;
+ fts5DataRelease(pData);
+ if( nRem<=0 ){
+ break;
+ }else{
+ pgno++;
+ pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
+ if( pData==0 ) break;
+ pChunk = &pData->p[4];
+ nChunk = MIN(nRem, pData->szLeaf - 4);
+ if( pgno==pgnoSave ){
+ assert( pSeg->pNextLeaf==0 );
+ pSeg->pNextLeaf = pData;
+ pData = 0;
+ }
+ }
+ }
+}
+
+/*
+** Iterator pIter currently points to a valid entry (not EOF). This
+** function appends the position list data for the current entry to
+** buffer pBuf. It does not make a copy of the position-list size
+** field.
+*/
+static void fts5SegiterPoslist(
+ Fts5Index *p,
+ Fts5SegIter *pSeg,
+ Fts5Colset *pColset,
+ Fts5Buffer *pBuf
+){
+ if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
+ if( pColset==0 ){
+ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
+ }else{
+ if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
+ PoslistCallbackCtx sCtx;
+ sCtx.pBuf = pBuf;
+ sCtx.pColset = pColset;
+ sCtx.eState = fts5IndexColsetTest(pColset, 0);
+ assert( sCtx.eState==0 || sCtx.eState==1 );
+ fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
+ }else{
+ PoslistOffsetsCtx sCtx;
+ memset(&sCtx, 0, sizeof(sCtx));
+ sCtx.pBuf = pBuf;
+ sCtx.pColset = pColset;
+ fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
+ }
+ }
+ }
+}
+
+/*
+** IN/OUT parameter (*pa) points to a position list n bytes in size. If
+** the position list contains entries for column iCol, then (*pa) is set
+** to point to the sub-position-list for that column and the number of
+** bytes in it returned. Or, if the argument position list does not
+** contain any entries for column iCol, return 0.
+*/
+static int fts5IndexExtractCol(
+ const u8 **pa, /* IN/OUT: Pointer to poslist */
+ int n, /* IN: Size of poslist in bytes */
+ int iCol /* Column to extract from poslist */
+){
+ int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
+ const u8 *p = *pa;
+ const u8 *pEnd = &p[n]; /* One byte past end of position list */
+
+ while( iCol>iCurrent ){
+ /* Advance pointer p until it points to pEnd or an 0x01 byte that is
+ ** not part of a varint. Note that it is not possible for a negative
+ ** or extremely large varint to occur within an uncorrupted position
+ ** list. So the last byte of each varint may be assumed to have a clear
+ ** 0x80 bit. */
+ while( *p!=0x01 ){
+ while( *p++ & 0x80 );
+ if( p>=pEnd ) return 0;
+ }
+ *pa = p++;
+ iCurrent = *p++;
+ if( iCurrent & 0x80 ){
+ p--;
+ p += fts5GetVarint32(p, iCurrent);
+ }
+ }
+ if( iCol!=iCurrent ) return 0;
+
+ /* Advance pointer p until it points to pEnd or an 0x01 byte that is
+ ** not part of a varint */
+ while( p<pEnd && *p!=0x01 ){
+ while( *p++ & 0x80 );
+ }
+
+ return p - (*pa);
+}
+
+static int fts5IndexExtractColset (
+ Fts5Colset *pColset, /* Colset to filter on */
+ const u8 *pPos, int nPos, /* Position list */
+ Fts5Buffer *pBuf /* Output buffer */
+){
+ int rc = SQLITE_OK;
+ int i;
+
+ fts5BufferZero(pBuf);
+ for(i=0; i<pColset->nCol; i++){
+ const u8 *pSub = pPos;
+ int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
+ if( nSub ){
+ fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
+ }
+ }
+ return rc;
+}
+
+/*
+** xSetOutputs callback used by detail=none tables.
+*/
+static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
+ pIter->base.iRowid = pSeg->iRowid;
+ pIter->base.nData = pSeg->nPos;
+}
+
+/*
+** xSetOutputs callback used by detail=full and detail=col tables when no
+** column filters are specified.
+*/
+static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ pIter->base.iRowid = pSeg->iRowid;
+ pIter->base.nData = pSeg->nPos;
+
+ assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
+ assert( pIter->pColset==0 );
+
+ if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
+ /* All data is stored on the current page. Populate the output
+ ** variables to point into the body of the page object. */
+ pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
+ }else{
+ /* The data is distributed over two or more pages. Copy it into the
+ ** Fts5Iter.poslist buffer and then set the output pointer to point
+ ** to this buffer. */
+ fts5BufferZero(&pIter->poslist);
+ fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
+ pIter->base.pData = pIter->poslist.p;
+ }
+}
+
+/*
+** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
+** against no columns at all).
+*/
+static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ UNUSED_PARAM(pSeg);
+ pIter->base.nData = 0;
+}
+
+/*
+** xSetOutputs callback used by detail=col when there is a column filter
+** and there are 100 or more columns. Also called as a fallback from
+** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
+*/
+static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ fts5BufferZero(&pIter->poslist);
+ fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
+ pIter->base.iRowid = pSeg->iRowid;
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+}
+
+/*
+** xSetOutputs callback used when:
+**
+** * detail=col,
+** * there is a column filter, and
+** * the table contains 100 or fewer columns.
+**
+** The last point is to ensure all column numbers are stored as
+** single-byte varints.
+*/
+static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
+
+ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
+ assert( pIter->pColset );
+
+ if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
+ fts5IterSetOutputs_Col(pIter, pSeg);
+ }else{
+ u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
+ u8 *pEnd = (u8*)&a[pSeg->nPos];
+ int iPrev = 0;
+ int *aiCol = pIter->pColset->aiCol;
+ int *aiColEnd = &aiCol[pIter->pColset->nCol];
+
+ u8 *aOut = pIter->poslist.p;
+ int iPrevOut = 0;
+
+ pIter->base.iRowid = pSeg->iRowid;
+
+ while( a<pEnd ){
+ iPrev += (int)a++[0] - 2;
+ while( *aiCol<iPrev ){
+ aiCol++;
+ if( aiCol==aiColEnd ) goto setoutputs_col_out;
+ }
+ if( *aiCol==iPrev ){
+ *aOut++ = (u8)((iPrev - iPrevOut) + 2);
+ iPrevOut = iPrev;
+ }
+ }
+
+setoutputs_col_out:
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = aOut - pIter->poslist.p;
+ }
+}
+
+/*
+** xSetOutputs callback used by detail=full when there is a column filter.
+*/
+static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ Fts5Colset *pColset = pIter->pColset;
+ pIter->base.iRowid = pSeg->iRowid;
+
+ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
+ assert( pColset );
+
+ if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
+ /* All data is stored on the current page. Populate the output
+ ** variables to point into the body of the page object. */
+ const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
+ if( pColset->nCol==1 ){
+ pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
+ pIter->base.pData = a;
+ }else{
+ fts5BufferZero(&pIter->poslist);
+ fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist);
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+ }
+ }else{
+ /* The data is distributed over two or more pages. Copy it into the
+ ** Fts5Iter.poslist buffer and then set the output pointer to point
+ ** to this buffer. */
+ fts5BufferZero(&pIter->poslist);
+ fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+ }
+}
+
+static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
+ if( *pRc==SQLITE_OK ){
+ Fts5Config *pConfig = pIter->pIndex->pConfig;
+ if( pConfig->eDetail==FTS5_DETAIL_NONE ){
+ pIter->xSetOutputs = fts5IterSetOutputs_None;
+ }
+
+ else if( pIter->pColset==0 ){
+ pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
+ }
+
+ else if( pIter->pColset->nCol==0 ){
+ pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
+ }
+
+ else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
+ pIter->xSetOutputs = fts5IterSetOutputs_Full;
+ }
+
+ else{
+ assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
+ if( pConfig->nCol<=100 ){
+ pIter->xSetOutputs = fts5IterSetOutputs_Col100;
+ sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
+ }else{
+ pIter->xSetOutputs = fts5IterSetOutputs_Col;
+ }
+ }
+ }
+}
+
+
+/*
+** Allocate a new Fts5Iter object.
**
** The new object will be used to iterate through data in structure pStruct.
** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
@@ -2682,19 +3361,18 @@ static Fts5IndexIter *fts5MultiIterAlloc(
static void fts5MultiIterNew(
Fts5Index *p, /* FTS5 backend to iterate within */
Fts5Structure *pStruct, /* Structure of specific index */
- int bSkipEmpty, /* True to ignore delete-keys */
int flags, /* FTS5INDEX_QUERY_XXX flags */
+ Fts5Colset *pColset, /* Colset to filter on (or NULL) */
const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
int iLevel, /* Level to iterate (-1 for all) */
int nSegment, /* Number of segments to merge (iLevel>=0) */
- Fts5IndexIter **ppOut /* New object */
+ Fts5Iter **ppOut /* New object */
){
int nSeg = 0; /* Number of segment-iters in use */
int iIter = 0; /* */
int iSeg; /* Used to iterate through segments */
- Fts5Buffer buf = {0,0,0}; /* Buffer used by fts5SegIterSeekInit() */
Fts5StructureLevel *pLvl;
- Fts5IndexIter *pNew;
+ Fts5Iter *pNew;
assert( (pTerm==0 && nTerm==0) || iLevel<0 );
@@ -2711,36 +3389,42 @@ static void fts5MultiIterNew(
*ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
if( pNew==0 ) return;
pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
- pNew->bSkipEmpty = (u8)bSkipEmpty;
+ pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
pNew->pStruct = pStruct;
+ pNew->pColset = pColset;
fts5StructureRef(pStruct);
+ if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
+ fts5IterSetOutputCb(&p->rc, pNew);
+ }
/* Initialize each of the component segment iterators. */
- if( iLevel<0 ){
- Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
- if( p->pHash ){
- /* Add a segment iterator for the current contents of the hash table. */
- Fts5SegIter *pIter = &pNew->aSeg[iIter++];
- fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
- }
- for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
- for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
- Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
+ if( p->rc==SQLITE_OK ){
+ if( iLevel<0 ){
+ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
+ if( p->pHash ){
+ /* Add a segment iterator for the current contents of the hash table. */
Fts5SegIter *pIter = &pNew->aSeg[iIter++];
- if( pTerm==0 ){
- fts5SegIterInit(p, pSeg, pIter);
- }else{
- fts5SegIterSeekInit(p, &buf, pTerm, nTerm, flags, pSeg, pIter);
+ fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
+ }
+ for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
+ for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
+ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
+ Fts5SegIter *pIter = &pNew->aSeg[iIter++];
+ if( pTerm==0 ){
+ fts5SegIterInit(p, pSeg, pIter);
+ }else{
+ fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
+ }
}
}
+ }else{
+ pLvl = &pStruct->aLevel[iLevel];
+ for(iSeg=nSeg-1; iSeg>=0; iSeg--){
+ fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
+ }
}
- }else{
- pLvl = &pStruct->aLevel[iLevel];
- for(iSeg=nSeg-1; iSeg>=0; iSeg--){
- fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
- }
+ assert( iIter==nSeg );
}
- assert( iIter==nSeg );
/* If the above was successful, each component iterators now points
** to the first entry in its segment. In this case initialize the
@@ -2750,7 +3434,8 @@ static void fts5MultiIterNew(
for(iIter=pNew->nSeg-1; iIter>0; iIter--){
int iEq;
if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
- fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
+ Fts5SegIter *pSeg = &pNew->aSeg[iEq];
+ if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
fts5MultiIterAdvanced(p, pNew, iEq, iIter);
}
}
@@ -2759,30 +3444,32 @@ static void fts5MultiIterNew(
if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
fts5MultiIterNext(p, pNew, 0, 0);
+ }else if( pNew->base.bEof==0 ){
+ Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
+ pNew->xSetOutputs(pNew, pSeg);
}
+
}else{
- fts5MultiIterFree(p, pNew);
+ fts5MultiIterFree(pNew);
*ppOut = 0;
}
- fts5BufferFree(&buf);
}
/*
-** Create an Fts5IndexIter that iterates through the doclist provided
+** Create an Fts5Iter that iterates through the doclist provided
** as the second argument.
*/
static void fts5MultiIterNew2(
Fts5Index *p, /* FTS5 backend to iterate within */
Fts5Data *pData, /* Doclist to iterate through */
int bDesc, /* True for descending rowid order */
- Fts5IndexIter **ppOut /* New object */
+ Fts5Iter **ppOut /* New object */
){
- Fts5IndexIter *pNew;
+ Fts5Iter *pNew;
pNew = fts5MultiIterAlloc(p, 2);
if( pNew ){
Fts5SegIter *pIter = &pNew->aSeg[1];
- pNew->bFiltered = 1;
pIter->flags = FTS5_SEGITER_ONETERM;
if( pData->szLeaf>0 ){
pIter->pLeaf = pData;
@@ -2798,8 +3485,9 @@ static void fts5MultiIterNew2(
}
pData = 0;
}else{
- pNew->bEof = 1;
+ pNew->base.bEof = 1;
}
+ fts5SegIterSetNext(p, pIter);
*ppOut = pNew;
}
@@ -2811,11 +3499,11 @@ static void fts5MultiIterNew2(
** Return true if the iterator is at EOF or if an error has occurred.
** False otherwise.
*/
-static int fts5MultiIterEof(Fts5Index *p, Fts5IndexIter *pIter){
+static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
assert( p->rc
- || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof
+ || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
);
- return (p->rc || pIter->bEof);
+ return (p->rc || pIter->base.bEof);
}
/*
@@ -2823,7 +3511,7 @@ static int fts5MultiIterEof(Fts5Index *p, Fts5IndexIter *pIter){
** to. If the iterator points to EOF when this function is called the
** results are undefined.
*/
-static i64 fts5MultiIterRowid(Fts5IndexIter *pIter){
+static i64 fts5MultiIterRowid(Fts5Iter *pIter){
assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
}
@@ -2833,7 +3521,7 @@ static i64 fts5MultiIterRowid(Fts5IndexIter *pIter){
*/
static void fts5MultiIterNextFrom(
Fts5Index *p,
- Fts5IndexIter *pIter,
+ Fts5Iter *pIter,
i64 iMatch
){
while( 1 ){
@@ -2850,52 +3538,12 @@ static void fts5MultiIterNextFrom(
** Return a pointer to a buffer containing the term associated with the
** entry that the iterator currently points to.
*/
-static const u8 *fts5MultiIterTerm(Fts5IndexIter *pIter, int *pn){
+static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
*pn = p->term.n;
return p->term.p;
}
-static void fts5ChunkIterate(
- Fts5Index *p, /* Index object */
- Fts5SegIter *pSeg, /* Poslist of this iterator */
- void *pCtx, /* Context pointer for xChunk callback */
- void (*xChunk)(Fts5Index*, void*, const u8*, int)
-){
- int nRem = pSeg->nPos; /* Number of bytes still to come */
- Fts5Data *pData = 0;
- u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
- int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
- int pgno = pSeg->iLeafPgno;
- int pgnoSave = 0;
-
- if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
- pgnoSave = pgno+1;
- }
-
- while( 1 ){
- xChunk(p, pCtx, pChunk, nChunk);
- nRem -= nChunk;
- fts5DataRelease(pData);
- if( nRem<=0 ){
- break;
- }else{
- pgno++;
- pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
- if( pData==0 ) break;
- pChunk = &pData->p[4];
- nChunk = MIN(nRem, pData->szLeaf - 4);
- if( pgno==pgnoSave ){
- assert( pSeg->pNextLeaf==0 );
- pSeg->pNextLeaf = pData;
- pData = 0;
- }
- }
- }
-}
-
-
-
/*
** Allocate a new segment-id for the structure pStruct. The new segment
** id must be between 1 and 65335 inclusive, and must not be used by
@@ -2912,18 +3560,46 @@ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
p->rc = SQLITE_FULL;
}else{
- while( iSegid==0 ){
- int iLvl, iSeg;
- sqlite3_randomness(sizeof(u32), (void*)&iSegid);
- iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1);
- for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
- for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
- if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
- iSegid = 0;
- }
+ /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
+ ** array is 63 elements, or 252 bytes, in size. */
+ u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
+ int iLvl, iSeg;
+ int i;
+ u32 mask;
+ memset(aUsed, 0, sizeof(aUsed));
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
+ int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
+ if( iId<=FTS5_MAX_SEGMENT ){
+ aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32);
}
}
}
+
+ for(i=0; aUsed[i]==0xFFFFFFFF; i++);
+ mask = aUsed[i];
+ for(iSegid=0; mask & (1 << iSegid); iSegid++);
+ iSegid += 1 + i*32;
+
+#ifdef SQLITE_DEBUG
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
+ assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
+ }
+ }
+ assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
+
+ {
+ sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
+ if( p->rc==SQLITE_OK ){
+ u8 aBlob[2] = {0xff, 0xff};
+ sqlite3_bind_int(pIdxSelect, 1, iSegid);
+ sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
+ assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
+ p->rc = sqlite3_reset(pIdxSelect);
+ }
+ }
+#endif
}
}
@@ -2942,15 +3618,14 @@ static void fts5IndexDiscardData(Fts5Index *p){
}
/*
-** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
-** with buffer (nOld/pOld).
+** Return the size of the prefix, in bytes, that buffer
+** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
+**
+** Buffer (pNew/<length-unknown>) is guaranteed to be greater
+** than buffer (pOld/nOld).
*/
-static int fts5PrefixCompress(
- int nOld, const u8 *pOld,
- int nNew, const u8 *pNew
-){
+static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
int i;
- assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
for(i=0; i<nOld; i++){
if( pOld[i]!=pNew[i] ) break;
}
@@ -3170,6 +3845,9 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
Fts5PageWriter *pPage = &pWriter->writer;
i64 iRowid;
+static int nCall = 0;
+nCall++;
+
assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
/* Set the szLeaf header field. */
@@ -3260,13 +3938,13 @@ static void fts5WriteAppendTerm(
** inefficient, but still correct. */
int n = nTerm;
if( pPage->term.n ){
- n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
+ n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
}
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
pPage = &pWriter->writer;
}
}else{
- nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
+ nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
}
@@ -3292,8 +3970,7 @@ static void fts5WriteAppendTerm(
static void fts5WriteAppendRowid(
Fts5Index *p,
Fts5SegWriter *pWriter,
- i64 iRowid,
- int nPos
+ i64 iRowid
){
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pPage = &pWriter->writer;
@@ -3320,8 +3997,6 @@ static void fts5WriteAppendRowid(
pWriter->iPrevRowid = iRowid;
pWriter->bFirstRowidInDoclist = 0;
pWriter->bFirstRowidInPage = 0;
-
- fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);
}
}
@@ -3372,7 +4047,9 @@ static void fts5WriteFinish(
fts5WriteFlushLeaf(p, pWriter);
}
*pnLeaf = pLeaf->pgno-1;
- fts5WriteFlushBtree(p, pWriter);
+ if( pLeaf->pgno>1 ){
+ fts5WriteFlushBtree(p, pWriter);
+ }
}
fts5BufferFree(&pLeaf->term);
fts5BufferFree(&pLeaf->buf);
@@ -3432,7 +4109,7 @@ static void fts5WriteInit(
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
-static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
+static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
int i;
Fts5Buffer buf;
memset(&buf, 0, sizeof(Fts5Buffer));
@@ -3510,13 +4187,15 @@ static void fts5IndexMergeLevel(
Fts5Structure *pStruct = *ppStruct;
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
Fts5StructureLevel *pLvlOut;
- Fts5IndexIter *pIter = 0; /* Iterator to read input data */
+ Fts5Iter *pIter = 0; /* Iterator to read input data */
int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
int nInput; /* Number of input segments */
Fts5SegWriter writer; /* Writer object */
Fts5StructureSegment *pSeg; /* Output segment */
Fts5Buffer term;
int bOldest; /* True if the output segment is the oldest */
+ int eDetail = p->pConfig->eDetail;
+ const int flags = FTS5INDEX_QUERY_NOOUTPUT;
assert( iLvl<pStruct->nLevel );
assert( pLvl->nMerge<=pLvl->nSeg );
@@ -3561,7 +4240,7 @@ static void fts5IndexMergeLevel(
bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
assert( iLvl>=0 );
- for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter);
+ for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
fts5MultiIterEof(p, pIter)==0;
fts5MultiIterNext(p, pIter, 0, 0)
){
@@ -3586,11 +4265,21 @@ static void fts5IndexMergeLevel(
/* Append the rowid to the output */
/* WRITEPOSLISTSIZE */
- nPos = pSegIter->nPos*2 + pSegIter->bDel;
- fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);
+ fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
- /* Append the position-list data to the output */
- fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
+ if( eDetail==FTS5_DETAIL_NONE ){
+ if( pSegIter->bDel ){
+ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
+ if( pSegIter->nPos>0 ){
+ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
+ }
+ }
+ }else{
+ /* Append the position-list data to the output */
+ nPos = pSegIter->nPos*2 + pSegIter->bDel;
+ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
+ fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
+ }
}
/* Flush the last leaf page to disk. Set the output segment b-tree height
@@ -3623,20 +4312,24 @@ static void fts5IndexMergeLevel(
pLvl->nMerge = nInput;
}
- fts5MultiIterFree(p, pIter);
+ fts5MultiIterFree(pIter);
fts5BufferFree(&term);
if( pnRem ) *pnRem -= writer.nLeafWritten;
}
/*
** Do up to nPg pages of automerge work on the index.
+**
+** Return true if any changes were actually made, or false otherwise.
*/
-static void fts5IndexMerge(
+static int fts5IndexMerge(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
- int nPg /* Pages of work to do */
+ int nPg, /* Pages of work to do */
+ int nMin /* Minimum number of segments to merge */
){
int nRem = nPg;
+ int bRet = 0;
Fts5Structure *pStruct = *ppStruct;
while( nRem>0 && p->rc==SQLITE_OK ){
int iLvl; /* To iterate through levels */
@@ -3667,17 +4360,17 @@ static void fts5IndexMerge(
}
#endif
- if( nBest<p->pConfig->nAutomerge
- && pStruct->aLevel[iBestLvl].nMerge==0
- ){
+ if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
break;
}
+ bRet = 1;
fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
fts5StructurePromote(p, iBestLvl+1, pStruct);
}
}
*ppStruct = pStruct;
+ return bRet;
}
/*
@@ -3705,7 +4398,7 @@ static void fts5IndexAutomerge(
pStruct->nWriteCounter += nLeaf;
nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
- fts5IndexMerge(p, ppStruct, nRem);
+ fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
}
}
@@ -3775,10 +4468,11 @@ static void fts5FlushOneHash(Fts5Index *p){
** for the new level-0 segment. */
pStruct = fts5StructureRead(p);
iSegid = fts5AllocateSegid(p, pStruct);
+ fts5StructureInvalidate(p);
if( iSegid ){
const int pgsz = p->pConfig->pgsz;
-
+ int eDetail = p->pConfig->eDetail;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
@@ -3821,12 +4515,7 @@ static void fts5FlushOneHash(Fts5Index *p){
** loop iterates through the poslists that make up the current
** doclist. */
while( p->rc==SQLITE_OK && iOff<nDoclist ){
- int nPos;
- int nCopy;
- int bDummy;
iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
- nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
- nCopy += nPos;
iRowid += iDelta;
if( writer.bFirstRowidInPage ){
@@ -3839,34 +4528,52 @@ static void fts5FlushOneHash(Fts5Index *p){
}
assert( pBuf->n<=pBuf->nSpace );
- if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
- /* The entire poslist will fit on the current leaf. So copy
- ** it in one go. */
- fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
- }else{
- /* The entire poslist will not fit on this leaf. So it needs
- ** to be broken into sections. The only qualification being
- ** that each varint must be stored contiguously. */
- const u8 *pPoslist = &pDoclist[iOff];
- int iPos = 0;
- while( p->rc==SQLITE_OK ){
- int nSpace = pgsz - pBuf->n - pPgidx->n;
- int n = 0;
- if( (nCopy - iPos)<=nSpace ){
- n = nCopy - iPos;
- }else{
- n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
+ if( eDetail==FTS5_DETAIL_NONE ){
+ if( iOff<nDoclist && pDoclist[iOff]==0 ){
+ pBuf->p[pBuf->n++] = 0;
+ iOff++;
+ if( iOff<nDoclist && pDoclist[iOff]==0 ){
+ pBuf->p[pBuf->n++] = 0;
+ iOff++;
}
- assert( n>0 );
- fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
- iPos += n;
- if( (pBuf->n + pPgidx->n)>=pgsz ){
- fts5WriteFlushLeaf(p, &writer);
+ }
+ if( (pBuf->n + pPgidx->n)>=pgsz ){
+ fts5WriteFlushLeaf(p, &writer);
+ }
+ }else{
+ int bDummy;
+ int nPos;
+ int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
+ nCopy += nPos;
+ if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
+ /* The entire poslist will fit on the current leaf. So copy
+ ** it in one go. */
+ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
+ }else{
+ /* The entire poslist will not fit on this leaf. So it needs
+ ** to be broken into sections. The only qualification being
+ ** that each varint must be stored contiguously. */
+ const u8 *pPoslist = &pDoclist[iOff];
+ int iPos = 0;
+ while( p->rc==SQLITE_OK ){
+ int nSpace = pgsz - pBuf->n - pPgidx->n;
+ int n = 0;
+ if( (nCopy - iPos)<=nSpace ){
+ n = nCopy - iPos;
+ }else{
+ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
+ }
+ assert( n>0 );
+ fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
+ iPos += n;
+ if( (pBuf->n + pPgidx->n)>=pgsz ){
+ fts5WriteFlushLeaf(p, &writer);
+ }
+ if( iPos>=nCopy ) break;
}
- if( iPos>=nCopy ) break;
}
+ iOff += nCopy;
}
- iOff += nCopy;
}
}
@@ -3912,28 +4619,41 @@ static void fts5IndexFlush(Fts5Index *p){
}
}
-
-int sqlite3Fts5IndexOptimize(Fts5Index *p){
- Fts5Structure *pStruct;
+static Fts5Structure *fts5IndexOptimizeStruct(
+ Fts5Index *p,
+ Fts5Structure *pStruct
+){
Fts5Structure *pNew = 0;
- int nSeg = 0;
-
- assert( p->rc==SQLITE_OK );
- fts5IndexFlush(p);
- pStruct = fts5StructureRead(p);
+ int nByte = sizeof(Fts5Structure);
+ int nSeg = pStruct->nSegment;
+ int i;
- if( pStruct ){
- assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
- nSeg = pStruct->nSegment;
- if( nSeg>1 ){
- int nByte = sizeof(Fts5Structure);
- nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
- pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
+ /* Figure out if this structure requires optimization. A structure does
+ ** not require optimization if either:
+ **
+ ** + it consists of fewer than two segments, or
+ ** + all segments are on the same level, or
+ ** + all segments except one are currently inputs to a merge operation.
+ **
+ ** In the first case, return NULL. In the second, increment the ref-count
+ ** on *pStruct and return a copy of the pointer to it.
+ */
+ if( nSeg<2 ) return 0;
+ for(i=0; i<pStruct->nLevel; i++){
+ int nThis = pStruct->aLevel[i].nSeg;
+ if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
+ fts5StructureRef(pStruct);
+ return pStruct;
}
+ assert( pStruct->aLevel[i].nMerge<=nThis );
}
+
+ nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
+ pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
+
if( pNew ){
Fts5StructureLevel *pLvl;
- int nByte = nSeg * sizeof(Fts5StructureSegment);
+ nByte = nSeg * sizeof(Fts5StructureSegment);
pNew->nLevel = pStruct->nLevel+1;
pNew->nRef = 1;
pNew->nWriteCounter = pStruct->nWriteCounter;
@@ -3942,7 +4662,10 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){
if( pLvl->aSeg ){
int iLvl, iSeg;
int iSegOut = 0;
- for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ /* Iterate through all segments, from oldest to newest. Add them to
+ ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
+ ** segment in the data structure. */
+ for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
iSegOut++;
@@ -3955,258 +4678,91 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p){
}
}
- if( pNew ){
- int iLvl = pNew->nLevel-1;
- while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
- int nRem = FTS5_OPT_WORK_UNIT;
- fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
- }
-
- fts5StructureWrite(p, pNew);
- fts5StructureRelease(pNew);
- }
-
- fts5StructureRelease(pStruct);
- return fts5IndexReturn(p);
+ return pNew;
}
-int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
+int sqlite3Fts5IndexOptimize(Fts5Index *p){
Fts5Structure *pStruct;
+ Fts5Structure *pNew = 0;
+ assert( p->rc==SQLITE_OK );
+ fts5IndexFlush(p);
pStruct = fts5StructureRead(p);
- if( pStruct && pStruct->nLevel ){
- fts5IndexMerge(p, &pStruct, nMerge);
- fts5StructureWrite(p, pStruct);
- }
- fts5StructureRelease(pStruct);
-
- return fts5IndexReturn(p);
-}
-
-static void fts5PoslistCallback(
- Fts5Index *p,
- void *pContext,
- const u8 *pChunk, int nChunk
-){
- assert_nc( nChunk>=0 );
- if( nChunk>0 ){
- fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
- }
-}
-
-typedef struct PoslistCallbackCtx PoslistCallbackCtx;
-struct PoslistCallbackCtx {
- Fts5Buffer *pBuf; /* Append to this buffer */
- Fts5Colset *pColset; /* Restrict matches to this column */
- int eState; /* See above */
-};
-
-/*
-** TODO: Make this more efficient!
-*/
-static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
- int i;
- for(i=0; i<pColset->nCol; i++){
- if( pColset->aiCol[i]==iCol ) return 1;
- }
- return 0;
-}
-
-static void fts5PoslistFilterCallback(
- Fts5Index *p,
- void *pContext,
- const u8 *pChunk, int nChunk
-){
- PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
- assert_nc( nChunk>=0 );
- if( nChunk>0 ){
- /* Search through to find the first varint with value 1. This is the
- ** start of the next columns hits. */
- int i = 0;
- int iStart = 0;
-
- if( pCtx->eState==2 ){
- int iCol;
- fts5FastGetVarint32(pChunk, i, iCol);
- if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
- pCtx->eState = 1;
- fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
- }else{
- pCtx->eState = 0;
- }
- }
+ fts5StructureInvalidate(p);
- do {
- while( i<nChunk && pChunk[i]!=0x01 ){
- while( pChunk[i] & 0x80 ) i++;
- i++;
- }
- if( pCtx->eState ){
- fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
- }
- if( i<nChunk ){
- int iCol;
- iStart = i;
- i++;
- if( i>=nChunk ){
- pCtx->eState = 2;
- }else{
- fts5FastGetVarint32(pChunk, i, iCol);
- pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
- if( pCtx->eState ){
- fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
- iStart = i;
- }
- }
- }
- }while( i<nChunk );
+ if( pStruct ){
+ pNew = fts5IndexOptimizeStruct(p, pStruct);
}
-}
+ fts5StructureRelease(pStruct);
-/*
-** Iterator pIter currently points to a valid entry (not EOF). This
-** function appends the position list data for the current entry to
-** buffer pBuf. It does not make a copy of the position-list size
-** field.
-*/
-static void fts5SegiterPoslist(
- Fts5Index *p,
- Fts5SegIter *pSeg,
- Fts5Colset *pColset,
- Fts5Buffer *pBuf
-){
- if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
- if( pColset==0 ){
- fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
- }else{
- PoslistCallbackCtx sCtx;
- sCtx.pBuf = pBuf;
- sCtx.pColset = pColset;
- sCtx.eState = fts5IndexColsetTest(pColset, 0);
- assert( sCtx.eState==0 || sCtx.eState==1 );
- fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
+ assert( pNew==0 || pNew->nSegment>0 );
+ if( pNew ){
+ int iLvl;
+ for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
+ while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
+ int nRem = FTS5_OPT_WORK_UNIT;
+ fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
}
+
+ fts5StructureWrite(p, pNew);
+ fts5StructureRelease(pNew);
}
+
+ return fts5IndexReturn(p);
}
/*
-** IN/OUT parameter (*pa) points to a position list n bytes in size. If
-** the position list contains entries for column iCol, then (*pa) is set
-** to point to the sub-position-list for that column and the number of
-** bytes in it returned. Or, if the argument position list does not
-** contain any entries for column iCol, return 0.
+** This is called to implement the special "VALUES('merge', $nMerge)"
+** INSERT command.
*/
-static int fts5IndexExtractCol(
- const u8 **pa, /* IN/OUT: Pointer to poslist */
- int n, /* IN: Size of poslist in bytes */
- int iCol /* Column to extract from poslist */
-){
- int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
- const u8 *p = *pa;
- const u8 *pEnd = &p[n]; /* One byte past end of position list */
- u8 prev = 0;
-
- while( iCol>iCurrent ){
- /* Advance pointer p until it points to pEnd or an 0x01 byte that is
- ** not part of a varint */
- while( (prev & 0x80) || *p!=0x01 ){
- prev = *p++;
- if( p==pEnd ) return 0;
+int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
+ Fts5Structure *pStruct = fts5StructureRead(p);
+ if( pStruct ){
+ int nMin = p->pConfig->nUsermerge;
+ fts5StructureInvalidate(p);
+ if( nMerge<0 ){
+ Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
+ fts5StructureRelease(pStruct);
+ pStruct = pNew;
+ nMin = 2;
+ nMerge = nMerge*-1;
+ }
+ if( pStruct && pStruct->nLevel ){
+ if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
+ fts5StructureWrite(p, pStruct);
+ }
}
- *pa = p++;
- p += fts5GetVarint32(p, iCurrent);
- }
- if( iCol!=iCurrent ) return 0;
-
- /* Advance pointer p until it points to pEnd or an 0x01 byte that is
- ** not part of a varint */
- assert( (prev & 0x80)==0 );
- while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
- prev = *p++;
+ fts5StructureRelease(pStruct);
}
- return p - (*pa);
+ return fts5IndexReturn(p);
}
-
-/*
-** Iterator pMulti currently points to a valid entry (not EOF). This
-** function appends the following to buffer pBuf:
-**
-** * The varint iDelta, and
-** * the position list that currently points to, including the size field.
-**
-** If argument pColset is NULL, then the position list is filtered according
-** to pColset before being appended to the buffer. If this means there are
-** no entries in the position list, nothing is appended to the buffer (not
-** even iDelta).
-**
-** If an error occurs, an error code is left in p->rc.
-*/
-static int fts5AppendPoslist(
+static void fts5AppendRowid(
Fts5Index *p,
i64 iDelta,
- Fts5IndexIter *pMulti,
- Fts5Colset *pColset,
+ Fts5Iter *pUnused,
Fts5Buffer *pBuf
){
- if( p->rc==SQLITE_OK ){
- Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
- assert( fts5MultiIterEof(p, pMulti)==0 );
- assert( pSeg->nPos>0 );
- if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+9+9) ){
-
- if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf
- && (pColset==0 || pColset->nCol==1)
- ){
- const u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
- int nPos;
- if( pColset ){
- nPos = fts5IndexExtractCol(&pPos, pSeg->nPos, pColset->aiCol[0]);
- if( nPos==0 ) return 1;
- }else{
- nPos = pSeg->nPos;
- }
- assert( nPos>0 );
- fts5BufferSafeAppendVarint(pBuf, iDelta);
- fts5BufferSafeAppendVarint(pBuf, nPos*2);
- fts5BufferSafeAppendBlob(pBuf, pPos, nPos);
- }else{
- int iSv1;
- int iSv2;
- int iData;
-
- /* Append iDelta */
- iSv1 = pBuf->n;
- fts5BufferSafeAppendVarint(pBuf, iDelta);
-
- /* WRITEPOSLISTSIZE */
- iSv2 = pBuf->n;
- fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
- iData = pBuf->n;
-
- fts5SegiterPoslist(p, pSeg, pColset, pBuf);
-
- if( pColset ){
- int nActual = pBuf->n - iData;
- if( nActual!=pSeg->nPos ){
- if( nActual==0 ){
- pBuf->n = iSv1;
- return 1;
- }else{
- int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
- while( iSv2<(iData-nReq) ){ pBuf->p[iSv2++] = 0x80; }
- sqlite3Fts5PutVarint(&pBuf->p[iSv2], nActual*2);
- }
- }
- }
- }
+ UNUSED_PARAM(pUnused);
+ fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
+}
- }
+static void fts5AppendPoslist(
+ Fts5Index *p,
+ i64 iDelta,
+ Fts5Iter *pMulti,
+ Fts5Buffer *pBuf
+){
+ int nData = pMulti->base.nData;
+ assert( nData>0 );
+ if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
+ fts5BufferSafeAppendVarint(pBuf, iDelta);
+ fts5BufferSafeAppendVarint(pBuf, nData*2);
+ fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
}
-
- return 0;
}
+
static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
@@ -4268,6 +4824,69 @@ static void fts5MergeAppendDocid(
}
/*
+** Swap the contents of buffer *p1 with that of *p2.
+*/
+static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
+ Fts5Buffer tmp = *p1;
+ *p1 = *p2;
+ *p2 = tmp;
+}
+
+static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
+ int i = *piOff;
+ if( i>=pBuf->n ){
+ *piOff = -1;
+ }else{
+ u64 iVal;
+ *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
+ *piRowid += iVal;
+ }
+}
+
+/*
+** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
+** In this case the buffers consist of a delta-encoded list of rowids only.
+*/
+static void fts5MergeRowidLists(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Buffer *p1, /* First list to merge */
+ Fts5Buffer *p2 /* Second list to merge */
+){
+ int i1 = 0;
+ int i2 = 0;
+ i64 iRowid1 = 0;
+ i64 iRowid2 = 0;
+ i64 iOut = 0;
+
+ Fts5Buffer out;
+ memset(&out, 0, sizeof(out));
+ sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
+ if( p->rc ) return;
+
+ fts5NextRowid(p1, &i1, &iRowid1);
+ fts5NextRowid(p2, &i2, &iRowid2);
+ while( i1>=0 || i2>=0 ){
+ if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
+ assert( iOut==0 || iRowid1>iOut );
+ fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
+ iOut = iRowid1;
+ fts5NextRowid(p1, &i1, &iRowid1);
+ }else{
+ assert( iOut==0 || iRowid2>iOut );
+ fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
+ iOut = iRowid2;
+ if( i1>=0 && iRowid1==iRowid2 ){
+ fts5NextRowid(p1, &i1, &iRowid1);
+ }
+ fts5NextRowid(p2, &i2, &iRowid2);
+ }
+ }
+
+ fts5BufferSwap(&out, p1);
+ fts5BufferFree(&out);
+}
+
+/*
** Buffers p1 and p2 contain doclists. This function merges the content
** of the two doclists together and sets buffer p1 to the result before
** returning.
@@ -4284,28 +4903,30 @@ static void fts5MergePrefixLists(
i64 iLastRowid = 0;
Fts5DoclistIter i1;
Fts5DoclistIter i2;
- Fts5Buffer out;
- Fts5Buffer tmp;
- memset(&out, 0, sizeof(out));
- memset(&tmp, 0, sizeof(tmp));
+ Fts5Buffer out = {0, 0, 0};
+ Fts5Buffer tmp = {0, 0, 0};
- sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
+ if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
fts5DoclistIterInit(p1, &i1);
fts5DoclistIterInit(p2, &i2);
- while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){
- if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){
+
+ while( 1 ){
+ if( i1.iRowid<i2.iRowid ){
/* Copy entry from i1 */
fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
fts5DoclistIterNext(&i1);
+ if( i1.aPoslist==0 ) break;
}
- else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
+ else if( i2.iRowid!=i1.iRowid ){
/* Copy entry from i2 */
fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
fts5DoclistIterNext(&i2);
+ if( i2.aPoslist==0 ) break;
}
else{
+ /* Merge the two position lists. */
i64 iPos1 = 0;
i64 iPos2 = 0;
int iOff1 = 0;
@@ -4313,29 +4934,53 @@ static void fts5MergePrefixLists(
u8 *a1 = &i1.aPoslist[i1.nSize];
u8 *a2 = &i2.aPoslist[i2.nSize];
+ i64 iPrev = 0;
Fts5PoslistWriter writer;
memset(&writer, 0, sizeof(writer));
- /* Merge the two position lists. */
fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
fts5BufferZero(&tmp);
+ sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
+ if( p->rc ) break;
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
+ assert( iPos1>=0 && iPos2>=0 );
- while( p->rc==SQLITE_OK && (iPos1>=0 || iPos2>=0) ){
- i64 iNew;
- if( iPos2<0 || (iPos1>=0 && iPos1<iPos2) ){
- iNew = iPos1;
- sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
- }else{
- iNew = iPos2;
- sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
- if( iPos1==iPos2 ){
- sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1);
+ if( iPos1<iPos2 ){
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
+ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
+ }else{
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
+ sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
+ }
+
+ if( iPos1>=0 && iPos2>=0 ){
+ while( 1 ){
+ if( iPos1<iPos2 ){
+ if( iPos1!=iPrev ){
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
+ }
+ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
+ if( iPos1<0 ) break;
+ }else{
+ assert( iPos2!=iPrev );
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
+ sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
+ if( iPos2<0 ) break;
}
}
- p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
+ }
+
+ if( iPos1>=0 ){
+ if( iPos1!=iPrev ){
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
+ }
+ fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
+ }else{
+ assert( iPos2>=0 && iPos2!=iPrev );
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
+ fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
}
/* WRITEPOSLISTSIZE */
@@ -4343,84 +4988,105 @@ static void fts5MergePrefixLists(
fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
fts5DoclistIterNext(&i1);
fts5DoclistIterNext(&i2);
+ if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
}
}
+ if( i1.aPoslist ){
+ fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
+ fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
+ }
+ else if( i2.aPoslist ){
+ fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
+ fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
+ }
+
fts5BufferSet(&p->rc, p1, out.n, out.p);
fts5BufferFree(&tmp);
fts5BufferFree(&out);
}
}
-static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
- Fts5Buffer tmp = *p1;
- *p1 = *p2;
- *p2 = tmp;
-}
-
static void fts5SetupPrefixIter(
Fts5Index *p, /* Index to read from */
int bDesc, /* True for "ORDER BY rowid DESC" */
const u8 *pToken, /* Buffer containing prefix to match */
int nToken, /* Size of buffer pToken in bytes */
Fts5Colset *pColset, /* Restrict matches to these columns */
- Fts5IndexIter **ppIter /* OUT: New iterator */
+ Fts5Iter **ppIter /* OUT: New iterator */
){
Fts5Structure *pStruct;
Fts5Buffer *aBuf;
const int nBuf = 32;
+ void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
+ void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
+ if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ xMerge = fts5MergeRowidLists;
+ xAppend = fts5AppendRowid;
+ }else{
+ xMerge = fts5MergePrefixLists;
+ xAppend = fts5AppendPoslist;
+ }
+
aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
pStruct = fts5StructureRead(p);
if( aBuf && pStruct ){
- const int flags = FTS5INDEX_QUERY_SCAN;
+ const int flags = FTS5INDEX_QUERY_SCAN
+ | FTS5INDEX_QUERY_SKIPEMPTY
+ | FTS5INDEX_QUERY_NOOUTPUT;
int i;
i64 iLastRowid = 0;
- Fts5IndexIter *p1 = 0; /* Iterator used to gather data from index */
+ Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
Fts5Data *pData;
Fts5Buffer doclist;
int bNewTerm = 1;
memset(&doclist, 0, sizeof(doclist));
- for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
+ fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
+ fts5IterSetOutputCb(&p->rc, p1);
+ for( /* no-op */ ;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &bNewTerm)
){
- i64 iRowid = fts5MultiIterRowid(p1);
- int nTerm;
- const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm);
+ Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
+ int nTerm = pSeg->term.n;
+ const u8 *pTerm = pSeg->term.p;
+ p1->xSetOutputs(p1, pSeg);
+
assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
if( bNewTerm ){
if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
}
- if( doclist.n>0 && iRowid<=iLastRowid ){
+ if( p1->base.nData==0 ) continue;
+
+ if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
assert( i<nBuf );
if( aBuf[i].n==0 ){
fts5BufferSwap(&doclist, &aBuf[i]);
fts5BufferZero(&doclist);
}else{
- fts5MergePrefixLists(p, &doclist, &aBuf[i]);
+ xMerge(p, &doclist, &aBuf[i]);
fts5BufferZero(&aBuf[i]);
}
}
iLastRowid = 0;
}
- if( !fts5AppendPoslist(p, iRowid-iLastRowid, p1, pColset, &doclist) ){
- iLastRowid = iRowid;
- }
+ xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
+ iLastRowid = p1->base.iRowid;
}
for(i=0; i<nBuf; i++){
if( p->rc==SQLITE_OK ){
- fts5MergePrefixLists(p, &doclist, &aBuf[i]);
+ xMerge(p, &doclist, &aBuf[i]);
}
fts5BufferFree(&aBuf[i]);
}
- fts5MultiIterFree(p, p1);
+ fts5MultiIterFree(p1);
pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
if( pData ){
@@ -4446,7 +5112,7 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
/* Allocate the hash table if it has not already been allocated */
if( p->pHash==0 ){
- p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData);
+ p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
}
/* Flush the hash table to disk if required */
@@ -4481,7 +5147,8 @@ int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){
int sqlite3Fts5IndexRollback(Fts5Index *p){
fts5CloseReader(p);
fts5IndexDiscardData(p);
- assert( p->rc==SQLITE_OK );
+ fts5StructureInvalidate(p);
+ /* assert( p->rc==SQLITE_OK ); */
return SQLITE_OK;
}
@@ -4492,6 +5159,7 @@ int sqlite3Fts5IndexRollback(Fts5Index *p){
*/
int sqlite3Fts5IndexReinit(Fts5Index *p){
Fts5Structure s;
+ fts5StructureInvalidate(p);
memset(&s, 0, sizeof(Fts5Structure));
fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
fts5StructureWrite(p, &s);
@@ -4550,11 +5218,13 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
int rc = SQLITE_OK;
if( p ){
assert( p->pReader==0 );
+ fts5StructureInvalidate(p);
sqlite3_finalize(p->pWriter);
sqlite3_finalize(p->pDeleter);
sqlite3_finalize(p->pIdxWriter);
sqlite3_finalize(p->pIdxDeleter);
sqlite3_finalize(p->pIdxSelect);
+ sqlite3_finalize(p->pDataVersion);
sqlite3Fts5HashFree(p->pHash);
sqlite3_free(p->zDataTbl);
sqlite3_free(p);
@@ -4567,7 +5237,11 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
** size. Return the number of bytes in the nChar character prefix of the
** buffer, or 0 if there are less than nChar characters in total.
*/
-static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
+int sqlite3Fts5IndexCharlenToBytelen(
+ const char *p,
+ int nByte,
+ int nChar
+){
int n = 0;
int i;
for(i=0; i<nChar; i++){
@@ -4624,7 +5298,8 @@ int sqlite3Fts5IndexWrite(
);
for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
- int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
+ const int nChar = pConfig->aPrefix[i];
+ int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
if( nByte ){
rc = sqlite3Fts5HashWrite(p->pHash,
p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
@@ -4648,22 +5323,27 @@ int sqlite3Fts5IndexQuery(
Fts5IndexIter **ppIter /* OUT: New iterator object */
){
Fts5Config *pConfig = p->pConfig;
- Fts5IndexIter *pRet = 0;
- int iIdx = 0;
+ Fts5Iter *pRet = 0;
Fts5Buffer buf = {0, 0, 0};
/* If the QUERY_SCAN flag is set, all other flags must be clear. */
assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
+ int iIdx = 0; /* Index to search */
memcpy(&buf.p[1], pToken, nToken);
-#ifdef SQLITE_DEBUG
- /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
+ /* Figure out which index to search and set iIdx accordingly. If this
+ ** is a prefix query for which there is no prefix index, set iIdx to
+ ** greater than pConfig->nPrefix to indicate that the query will be
+ ** satisfied by scanning multiple terms in the main index.
+ **
+ ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
** prefix-query. Instead of using a prefix-index (if one exists),
** evaluate the prefix query using the main FTS index. This is used
** for internal sanity checking by the integrity-check in debug
** mode only. */
+#ifdef SQLITE_DEBUG
if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
assert( flags & FTS5INDEX_QUERY_PREFIX );
iIdx = 1+pConfig->nPrefix;
@@ -4677,24 +5357,35 @@ int sqlite3Fts5IndexQuery(
}
if( iIdx<=pConfig->nPrefix ){
+ /* Straight index lookup */
Fts5Structure *pStruct = fts5StructureRead(p);
buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
if( pStruct ){
- fts5MultiIterNew(p, pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet);
+ fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
+ pColset, buf.p, nToken+1, -1, 0, &pRet
+ );
fts5StructureRelease(pStruct);
}
}else{
+ /* Scan multiple terms in the main index */
int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
buf.p[0] = FTS5_MAIN_PREFIX;
fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
+ assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
+ fts5IterSetOutputCb(&p->rc, pRet);
+ if( p->rc==SQLITE_OK ){
+ Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
+ if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
+ }
}
if( p->rc ){
- sqlite3Fts5IterClose(pRet);
+ sqlite3Fts5IterClose(&pRet->base);
pRet = 0;
fts5CloseReader(p);
}
- *ppIter = pRet;
+
+ *ppIter = &pRet->base;
sqlite3Fts5BufferFree(&buf);
}
return fts5IndexReturn(p);
@@ -4703,15 +5394,11 @@ int sqlite3Fts5IndexQuery(
/*
** Return true if the iterator passed as the only argument is at EOF.
*/
-int sqlite3Fts5IterEof(Fts5IndexIter *pIter){
- assert( pIter->pIndex->rc==SQLITE_OK );
- return pIter->bEof;
-}
-
/*
** Move to the next matching rowid.
*/
-int sqlite3Fts5IterNext(Fts5IndexIter *pIter){
+int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
assert( pIter->pIndex->rc==SQLITE_OK );
fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
return fts5IndexReturn(pIter->pIndex);
@@ -4720,7 +5407,8 @@ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){
/*
** Move to the next matching term/rowid. Used by the fts5vocab module.
*/
-int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){
+int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
Fts5Index *p = pIter->pIndex;
assert( pIter->pIndex->rc==SQLITE_OK );
@@ -4731,7 +5419,7 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){
if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
fts5DataRelease(pSeg->pLeaf);
pSeg->pLeaf = 0;
- pIter->bEof = 1;
+ pIter->base.bEof = 1;
}
}
@@ -4743,111 +5431,30 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){
** definition of "at or after" depends on whether this iterator iterates
** in ascending or descending rowid order.
*/
-int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){
+int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
return fts5IndexReturn(pIter->pIndex);
}
/*
-** Return the current rowid.
-*/
-i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
- return fts5MultiIterRowid(pIter);
-}
-
-/*
** Return the current term.
*/
-const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
+const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
int n;
- const char *z = (const char*)fts5MultiIterTerm(pIter, &n);
+ const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
*pn = n-1;
return &z[1];
}
-
-static int fts5IndexExtractColset (
- Fts5Colset *pColset, /* Colset to filter on */
- const u8 *pPos, int nPos, /* Position list */
- Fts5Buffer *pBuf /* Output buffer */
-){
- int rc = SQLITE_OK;
- int i;
-
- fts5BufferZero(pBuf);
- for(i=0; i<pColset->nCol; i++){
- const u8 *pSub = pPos;
- int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
- if( nSub ){
- fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
- }
- }
- return rc;
-}
-
-
-/*
-** Return a pointer to a buffer containing a copy of the position list for
-** the current entry. Output variable *pn is set to the size of the buffer
-** in bytes before returning.
-**
-** The returned position list does not include the "number of bytes" varint
-** field that starts the position list on disk.
-*/
-int sqlite3Fts5IterPoslist(
- Fts5IndexIter *pIter,
- Fts5Colset *pColset, /* Column filter (or NULL) */
- const u8 **pp, /* OUT: Pointer to position-list data */
- int *pn, /* OUT: Size of position-list in bytes */
- i64 *piRowid /* OUT: Current rowid */
-){
- Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
- assert( pIter->pIndex->rc==SQLITE_OK );
- *piRowid = pSeg->iRowid;
- if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
- u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
- if( pColset==0 || pIter->bFiltered ){
- *pn = pSeg->nPos;
- *pp = pPos;
- }else if( pColset->nCol==1 ){
- *pp = pPos;
- *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]);
- }else{
- fts5BufferZero(&pIter->poslist);
- fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist);
- *pp = pIter->poslist.p;
- *pn = pIter->poslist.n;
- }
- }else{
- fts5BufferZero(&pIter->poslist);
- fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
- *pp = pIter->poslist.p;
- *pn = pIter->poslist.n;
- }
- return fts5IndexReturn(pIter->pIndex);
-}
-
-/*
-** This function is similar to sqlite3Fts5IterPoslist(), except that it
-** copies the position list into the buffer supplied as the second
-** argument.
-*/
-int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
- Fts5Index *p = pIter->pIndex;
- Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
- assert( p->rc==SQLITE_OK );
- fts5BufferZero(pBuf);
- fts5SegiterPoslist(p, pSeg, 0, pBuf);
- return fts5IndexReturn(p);
-}
-
/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
-void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
- if( pIter ){
+void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
+ if( pIndexIter ){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
Fts5Index *pIndex = pIter->pIndex;
- fts5MultiIterFree(pIter->pIndex, pIter);
+ fts5MultiIterFree(pIter);
fts5CloseReader(pIndex);
}
}
@@ -4940,7 +5547,7 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
/*
** Return a simple checksum value based on the arguments.
*/
-static u64 fts5IndexEntryCksum(
+u64 sqlite3Fts5IndexEntryCksum(
i64 iRowid,
int iCol,
int iPos,
@@ -5010,30 +5617,32 @@ static int fts5QueryCksum(
int flags, /* Flags for Fts5IndexQuery */
u64 *pCksum /* IN/OUT: Checksum value */
){
+ int eDetail = p->pConfig->eDetail;
u64 cksum = *pCksum;
- Fts5IndexIter *pIdxIter = 0;
- int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);
+ Fts5IndexIter *pIter = 0;
+ int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
- while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
- i64 dummy;
- const u8 *pPos;
- int nPos;
- i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
- rc = sqlite3Fts5IterPoslist(pIdxIter, 0, &pPos, &nPos, &dummy);
- if( rc==SQLITE_OK ){
+ while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
+ i64 rowid = pIter->iRowid;
+
+ if( eDetail==FTS5_DETAIL_NONE ){
+ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
+ }else{
Fts5PoslistReader sReader;
- for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader);
+ for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
sReader.bEof==0;
sqlite3Fts5PoslistReaderNext(&sReader)
){
int iCol = FTS5_POS2COLUMN(sReader.iPos);
int iOff = FTS5_POS2OFFSET(sReader.iPos);
- cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
+ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
}
- rc = sqlite3Fts5IterNext(pIdxIter);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IterNext(pIter);
}
}
- sqlite3Fts5IterClose(pIdxIter);
+ sqlite3Fts5IterClose(pIter);
*pCksum = cksum;
return rc;
@@ -5221,7 +5830,7 @@ static void fts5IndexIntegrityCheckSegment(
** ignore this b-tree entry. Otherwise, load it into memory. */
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
- pLeaf = fts5DataRead(p, iRow);
+ pLeaf = fts5LeafRead(p, iRow);
if( pLeaf==0 ) break;
/* Check that the leaf contains at least one term, and that it is equal
@@ -5327,7 +5936,7 @@ static void fts5IndexIntegrityCheckSegment(
/*
** Run internal checks to ensure that the FTS index (a) is internally
** consistent and (b) contains entries for which the XOR of the checksums
-** as calculated by fts5IndexEntryCksum() is cksum.
+** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
** checksum does not match. Return SQLITE_OK if all checks pass without
@@ -5335,9 +5944,10 @@ static void fts5IndexIntegrityCheckSegment(
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
+ int eDetail = p->pConfig->eDetail;
u64 cksum2 = 0; /* Checksum based on contents of indexes */
Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
- Fts5IndexIter *pIter; /* Used to iterate through entire index */
+ Fts5Iter *pIter; /* Used to iterate through entire index */
Fts5Structure *pStruct; /* Index structure */
#ifdef SQLITE_DEBUG
@@ -5345,6 +5955,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
u64 cksum3 = 0; /* Checksum based on contents of indexes */
Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
#endif
+ const int flags = FTS5INDEX_QUERY_NOOUTPUT;
/* Load the FTS index structure */
pStruct = fts5StructureRead(p);
@@ -5373,7 +5984,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
** same term is performed. cksum3 is calculated based on the entries
** extracted by these queries.
*/
- for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter);
+ for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
fts5MultiIterEof(p, pIter)==0;
fts5MultiIterNext(p, pIter, 0, 0)
){
@@ -5386,17 +5997,23 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
/* If this is a new term, query for it. Update cksum3 with the results. */
fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
- poslist.n = 0;
- fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst] , 0, &poslist);
- while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
- int iCol = FTS5_POS2COLUMN(iPos);
- int iTokOff = FTS5_POS2OFFSET(iPos);
- cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
+ if( eDetail==FTS5_DETAIL_NONE ){
+ if( 0==fts5MultiIterIsEmpty(p, pIter) ){
+ cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
+ }
+ }else{
+ poslist.n = 0;
+ fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
+ while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
+ int iCol = FTS5_POS2COLUMN(iPos);
+ int iTokOff = FTS5_POS2OFFSET(iPos);
+ cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
+ }
}
}
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
- fts5MultiIterFree(p, pIter);
+ fts5MultiIterFree(pIter);
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
fts5StructureRelease(pStruct);
@@ -5407,34 +6024,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
return fts5IndexReturn(p);
}
-
-/*
-** Calculate and return a checksum that is the XOR of the index entry
-** checksum of all entries that would be generated by the token specified
-** by the final 5 arguments.
-*/
-u64 sqlite3Fts5IndexCksum(
- Fts5Config *pConfig, /* Configuration object */
- i64 iRowid, /* Document term appears in */
- int iCol, /* Column term appears in */
- int iPos, /* Position term appears in */
- const char *pTerm, int nTerm /* Term at iPos */
-){
- u64 ret = 0; /* Return value */
- int iIdx; /* For iterating through indexes */
-
- ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);
-
- for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
- int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
- if( nByte ){
- ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
- }
- }
-
- return ret;
-}
-
/*************************************************************************
**************************************************************************
** Below this point is the implementation of the fts5_decode() scalar
@@ -5603,6 +6192,47 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
}
/*
+** This function is part of the fts5_decode() debugging function. It is
+** only ever used with detail=none tables.
+**
+** Buffer (pData/nData) contains a doclist in the format used by detail=none
+** tables. This function appends a human-readable version of that list to
+** buffer pBuf.
+**
+** If *pRc is other than SQLITE_OK when this function is called, it is a
+** no-op. If an OOM or other error occurs within this function, *pRc is
+** set to an SQLite error code before returning. The final state of buffer
+** pBuf is undefined in this case.
+*/
+static void fts5DecodeRowidList(
+ int *pRc, /* IN/OUT: Error code */
+ Fts5Buffer *pBuf, /* Buffer to append text to */
+ const u8 *pData, int nData /* Data to decode list-of-rowids from */
+){
+ int i = 0;
+ i64 iRowid = 0;
+
+ while( i<nData ){
+ const char *zApp = "";
+ u64 iVal;
+ i += sqlite3Fts5GetVarint(&pData[i], &iVal);
+ iRowid += iVal;
+
+ if( i<nData && pData[i]==0x00 ){
+ i++;
+ if( i<nData && pData[i]==0x00 ){
+ i++;
+ zApp = "+";
+ }else{
+ zApp = "*";
+ }
+ }
+
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
+ }
+}
+
+/*
** The implementation of user-defined scalar function fts5_decode().
*/
static void fts5DecodeFunction(
@@ -5617,8 +6247,10 @@ static void fts5DecodeFunction(
Fts5Buffer s; /* Build up text to return here */
int rc = SQLITE_OK; /* Return code */
int nSpace = 0;
+ int eDetailNone = (sqlite3_user_data(pCtx)!=0);
assert( nArg==2 );
+ UNUSED_PARAM(nArg);
memset(&s, 0, sizeof(Fts5Buffer));
iRowid = sqlite3_value_int64(apVal[0]);
@@ -5658,6 +6290,54 @@ static void fts5DecodeFunction(
}else{
fts5DecodeStructure(&rc, &s, a, n);
}
+ }else if( eDetailNone ){
+ Fts5Buffer term; /* Current term read from page */
+ int szLeaf;
+ int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
+ int iTermOff;
+ int nKeep = 0;
+ int iOff;
+
+ memset(&term, 0, sizeof(Fts5Buffer));
+
+ /* Decode any entries that occur before the first term. */
+ if( szLeaf<n ){
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
+ }else{
+ iTermOff = szLeaf;
+ }
+ fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
+
+ iOff = iTermOff;
+ while( iOff<szLeaf ){
+ int nAppend;
+
+ /* Read the term data for the next term*/
+ iOff += fts5GetVarint32(&a[iOff], nAppend);
+ term.n = nKeep;
+ fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
+ sqlite3Fts5BufferAppendPrintf(
+ &rc, &s, " term=%.*s", term.n, (const char*)term.p
+ );
+ iOff += nAppend;
+
+ /* Figure out where the doclist for this term ends */
+ if( iPgidxOff<n ){
+ int nIncr;
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
+ iTermOff += nIncr;
+ }else{
+ iTermOff = szLeaf;
+ }
+
+ fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
+ iOff = iTermOff;
+ if( iOff<szLeaf ){
+ iOff += fts5GetVarint32(&a[iOff], nKeep);
+ }
+ }
+
+ fts5BufferFree(&term);
}else{
Fts5Buffer term; /* Current term read from page */
int szLeaf; /* Offset of pgidx in a[] */
@@ -5785,6 +6465,14 @@ int sqlite3Fts5IndexInit(sqlite3 *db){
int rc = sqlite3_create_function(
db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
);
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5_decode_none", 2,
+ SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
+ );
+ }
+
if( rc==SQLITE_OK ){
rc = sqlite3_create_function(
db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
@@ -5793,3 +6481,11 @@ int sqlite3Fts5IndexInit(sqlite3 *db){
return rc;
}
+
+int sqlite3Fts5IndexReset(Fts5Index *p){
+ assert( p->pStruct==0 || p->iStructVersion!=0 );
+ if( fts5IndexDataVersion(p)!=p->iStructVersion ){
+ fts5StructureInvalidate(p);
+ }
+ return fts5IndexReturn(p);
+}
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5_hash.c ('k') | third_party/sqlite/src/ext/fts5/fts5_main.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698