| Index: third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_hash.c
|
| diff --git a/third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_hash.c b/third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_hash.c
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f184957af6d2320ce5536a21005b4cf1a85b63e8
|
| --- /dev/null
|
| +++ b/third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_hash.c
|
| @@ -0,0 +1,472 @@
|
| +/*
|
| +** 2014 August 11
|
| +**
|
| +** The author disclaims copyright to this source code. In place of
|
| +** a legal notice, here is a blessing:
|
| +**
|
| +** May you do good and not evil.
|
| +** May you find forgiveness for yourself and forgive others.
|
| +** May you share freely, never taking more than you give.
|
| +**
|
| +******************************************************************************
|
| +**
|
| +*/
|
| +
|
| +
|
| +
|
| +#include "fts5Int.h"
|
| +
|
| +typedef struct Fts5HashEntry Fts5HashEntry;
|
| +
|
| +/*
|
| +** This file contains the implementation of an in-memory hash table used
|
| +** to accumuluate "term -> doclist" content before it is flused to a level-0
|
| +** segment.
|
| +*/
|
| +
|
| +
|
| +struct Fts5Hash {
|
| + int *pnByte; /* Pointer to bytes counter */
|
| + int nEntry; /* Number of entries currently in hash */
|
| + int nSlot; /* Size of aSlot[] array */
|
| + Fts5HashEntry *pScan; /* Current ordered scan item */
|
| + Fts5HashEntry **aSlot; /* Array of hash slots */
|
| +};
|
| +
|
| +/*
|
| +** Each entry in the hash table is represented by an object of the
|
| +** following type. Each object, its key (zKey[]) and its current data
|
| +** are stored in a single memory allocation. The position list data
|
| +** immediately follows the key data in memory.
|
| +**
|
| +** The data that follows the key is in a similar, but not identical format
|
| +** to the doclist data stored in the database. It is:
|
| +**
|
| +** * Rowid, as a varint
|
| +** * Position list, without 0x00 terminator.
|
| +** * Size of previous position list and rowid, as a 4 byte
|
| +** big-endian integer.
|
| +**
|
| +** iRowidOff:
|
| +** Offset of last rowid written to data area. Relative to first byte of
|
| +** structure.
|
| +**
|
| +** nData:
|
| +** Bytes of data written since iRowidOff.
|
| +*/
|
| +struct Fts5HashEntry {
|
| + Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
|
| + Fts5HashEntry *pScanNext; /* Next entry in sorted order */
|
| +
|
| + int nAlloc; /* Total size of allocation */
|
| + int iSzPoslist; /* Offset of space for 4-byte poslist size */
|
| + int nData; /* Total bytes of data (incl. structure) */
|
| + u8 bDel; /* Set delete-flag @ iSzPoslist */
|
| +
|
| + int iCol; /* Column of last value written */
|
| + int iPos; /* Position of last value written */
|
| + i64 iRowid; /* Rowid of last value written */
|
| + char zKey[8]; /* Nul-terminated entry key */
|
| +};
|
| +
|
| +/*
|
| +** Size of Fts5HashEntry without the zKey[] array.
|
| +*/
|
| +#define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
|
| +
|
| +
|
| +
|
| +/*
|
| +** Allocate a new hash table.
|
| +*/
|
| +int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
|
| + int rc = SQLITE_OK;
|
| + Fts5Hash *pNew;
|
| +
|
| + *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
|
| + if( pNew==0 ){
|
| + rc = SQLITE_NOMEM;
|
| + }else{
|
| + int nByte;
|
| + memset(pNew, 0, sizeof(Fts5Hash));
|
| + pNew->pnByte = pnByte;
|
| +
|
| + pNew->nSlot = 1024;
|
| + nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
|
| + pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
|
| + if( pNew->aSlot==0 ){
|
| + sqlite3_free(pNew);
|
| + *ppNew = 0;
|
| + rc = SQLITE_NOMEM;
|
| + }else{
|
| + memset(pNew->aSlot, 0, nByte);
|
| + }
|
| + }
|
| + return rc;
|
| +}
|
| +
|
| +/*
|
| +** Free a hash table object.
|
| +*/
|
| +void sqlite3Fts5HashFree(Fts5Hash *pHash){
|
| + if( pHash ){
|
| + sqlite3Fts5HashClear(pHash);
|
| + sqlite3_free(pHash->aSlot);
|
| + sqlite3_free(pHash);
|
| + }
|
| +}
|
| +
|
| +/*
|
| +** Empty (but do not delete) a hash table.
|
| +*/
|
| +void sqlite3Fts5HashClear(Fts5Hash *pHash){
|
| + int i;
|
| + for(i=0; i<pHash->nSlot; i++){
|
| + Fts5HashEntry *pNext;
|
| + Fts5HashEntry *pSlot;
|
| + for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
|
| + pNext = pSlot->pHashNext;
|
| + sqlite3_free(pSlot);
|
| + }
|
| + }
|
| + memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
|
| + pHash->nEntry = 0;
|
| +}
|
| +
|
| +static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
|
| + int i;
|
| + unsigned int h = 13;
|
| + for(i=n-1; i>=0; i--){
|
| + h = (h << 3) ^ h ^ p[i];
|
| + }
|
| + return (h % nSlot);
|
| +}
|
| +
|
| +static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
|
| + int i;
|
| + unsigned int h = 13;
|
| + for(i=n-1; i>=0; i--){
|
| + h = (h << 3) ^ h ^ p[i];
|
| + }
|
| + h = (h << 3) ^ h ^ b;
|
| + return (h % nSlot);
|
| +}
|
| +
|
| +/*
|
| +** Resize the hash table by doubling the number of slots.
|
| +*/
|
| +static int fts5HashResize(Fts5Hash *pHash){
|
| + int nNew = pHash->nSlot*2;
|
| + int i;
|
| + Fts5HashEntry **apNew;
|
| + Fts5HashEntry **apOld = pHash->aSlot;
|
| +
|
| + apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
|
| + if( !apNew ) return SQLITE_NOMEM;
|
| + memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
|
| +
|
| + for(i=0; i<pHash->nSlot; i++){
|
| + while( apOld[i] ){
|
| + int iHash;
|
| + Fts5HashEntry *p = apOld[i];
|
| + apOld[i] = p->pHashNext;
|
| + iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey));
|
| + p->pHashNext = apNew[iHash];
|
| + apNew[iHash] = p;
|
| + }
|
| + }
|
| +
|
| + sqlite3_free(apOld);
|
| + pHash->nSlot = nNew;
|
| + pHash->aSlot = apNew;
|
| + return SQLITE_OK;
|
| +}
|
| +
|
| +static void fts5HashAddPoslistSize(Fts5HashEntry *p){
|
| + if( p->iSzPoslist ){
|
| + u8 *pPtr = (u8*)p;
|
| + int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
|
| + int nPos = nSz*2 + p->bDel; /* Value of nPos field */
|
| +
|
| + assert( p->bDel==0 || p->bDel==1 );
|
| + if( nPos<=127 ){
|
| + pPtr[p->iSzPoslist] = (u8)nPos;
|
| + }else{
|
| + int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
|
| + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
|
| + sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
|
| + p->nData += (nByte-1);
|
| + }
|
| + p->bDel = 0;
|
| + p->iSzPoslist = 0;
|
| + }
|
| +}
|
| +
|
| +int sqlite3Fts5HashWrite(
|
| + Fts5Hash *pHash,
|
| + i64 iRowid, /* Rowid for this entry */
|
| + int iCol, /* Column token appears in (-ve -> delete) */
|
| + int iPos, /* Position of token within column */
|
| + char bByte, /* First byte of token */
|
| + const char *pToken, int nToken /* Token to add or remove to or from index */
|
| +){
|
| + unsigned int iHash;
|
| + Fts5HashEntry *p;
|
| + u8 *pPtr;
|
| + int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
|
| +
|
| + /* Attempt to locate an existing hash entry */
|
| + iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
|
| + for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
|
| + if( p->zKey[0]==bByte
|
| + && memcmp(&p->zKey[1], pToken, nToken)==0
|
| + && p->zKey[nToken+1]==0
|
| + ){
|
| + break;
|
| + }
|
| + }
|
| +
|
| + /* If an existing hash entry cannot be found, create a new one. */
|
| + if( p==0 ){
|
| + int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64;
|
| + if( nByte<128 ) nByte = 128;
|
| +
|
| + if( (pHash->nEntry*2)>=pHash->nSlot ){
|
| + int rc = fts5HashResize(pHash);
|
| + if( rc!=SQLITE_OK ) return rc;
|
| + iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
|
| + }
|
| +
|
| + p = (Fts5HashEntry*)sqlite3_malloc(nByte);
|
| + if( !p ) return SQLITE_NOMEM;
|
| + memset(p, 0, FTS5_HASHENTRYSIZE);
|
| + p->nAlloc = nByte;
|
| + p->zKey[0] = bByte;
|
| + memcpy(&p->zKey[1], pToken, nToken);
|
| + assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) );
|
| + p->zKey[nToken+1] = '\0';
|
| + p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE;
|
| + p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
|
| + p->iSzPoslist = p->nData;
|
| + p->nData += 1;
|
| + p->iRowid = iRowid;
|
| + p->pHashNext = pHash->aSlot[iHash];
|
| + pHash->aSlot[iHash] = p;
|
| + pHash->nEntry++;
|
| + nIncr += p->nData;
|
| + }
|
| +
|
| + /* Check there is enough space to append a new entry. Worst case scenario
|
| + ** is:
|
| + **
|
| + ** + 9 bytes for a new rowid,
|
| + ** + 4 byte reserved for the "poslist size" varint.
|
| + ** + 1 byte for a "new column" byte,
|
| + ** + 3 bytes for a new column number (16-bit max) as a varint,
|
| + ** + 5 bytes for the new position offset (32-bit max).
|
| + */
|
| + if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
|
| + int nNew = p->nAlloc * 2;
|
| + Fts5HashEntry *pNew;
|
| + Fts5HashEntry **pp;
|
| + pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
|
| + if( pNew==0 ) return SQLITE_NOMEM;
|
| + pNew->nAlloc = nNew;
|
| + for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
|
| + *pp = pNew;
|
| + p = pNew;
|
| + }
|
| + pPtr = (u8*)p;
|
| + nIncr -= p->nData;
|
| +
|
| + /* If this is a new rowid, append the 4-byte size field for the previous
|
| + ** entry, and the new rowid for this entry. */
|
| + if( iRowid!=p->iRowid ){
|
| + fts5HashAddPoslistSize(p);
|
| + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
|
| + p->iSzPoslist = p->nData;
|
| + p->nData += 1;
|
| + p->iCol = 0;
|
| + p->iPos = 0;
|
| + p->iRowid = iRowid;
|
| + }
|
| +
|
| + if( iCol>=0 ){
|
| + /* Append a new column value, if necessary */
|
| + assert( iCol>=p->iCol );
|
| + if( iCol!=p->iCol ){
|
| + pPtr[p->nData++] = 0x01;
|
| + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
|
| + p->iCol = iCol;
|
| + p->iPos = 0;
|
| + }
|
| +
|
| + /* Append the new position offset */
|
| + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
|
| + p->iPos = iPos;
|
| + }else{
|
| + /* This is a delete. Set the delete flag. */
|
| + p->bDel = 1;
|
| + }
|
| + nIncr += p->nData;
|
| +
|
| + *pHash->pnByte += nIncr;
|
| + return SQLITE_OK;
|
| +}
|
| +
|
| +
|
| +/*
|
| +** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
|
| +** each sorted in key order. This function merges the two lists into a
|
| +** single list and returns a pointer to its first element.
|
| +*/
|
| +static Fts5HashEntry *fts5HashEntryMerge(
|
| + Fts5HashEntry *pLeft,
|
| + Fts5HashEntry *pRight
|
| +){
|
| + Fts5HashEntry *p1 = pLeft;
|
| + Fts5HashEntry *p2 = pRight;
|
| + Fts5HashEntry *pRet = 0;
|
| + Fts5HashEntry **ppOut = &pRet;
|
| +
|
| + while( p1 || p2 ){
|
| + if( p1==0 ){
|
| + *ppOut = p2;
|
| + p2 = 0;
|
| + }else if( p2==0 ){
|
| + *ppOut = p1;
|
| + p1 = 0;
|
| + }else{
|
| + int i = 0;
|
| + while( p1->zKey[i]==p2->zKey[i] ) i++;
|
| +
|
| + if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
|
| + /* p2 is smaller */
|
| + *ppOut = p2;
|
| + ppOut = &p2->pScanNext;
|
| + p2 = p2->pScanNext;
|
| + }else{
|
| + /* p1 is smaller */
|
| + *ppOut = p1;
|
| + ppOut = &p1->pScanNext;
|
| + p1 = p1->pScanNext;
|
| + }
|
| + *ppOut = 0;
|
| + }
|
| + }
|
| +
|
| + return pRet;
|
| +}
|
| +
|
| +/*
|
| +** Extract all tokens from hash table iHash and link them into a list
|
| +** in sorted order. The hash table is cleared before returning. It is
|
| +** the responsibility of the caller to free the elements of the returned
|
| +** list.
|
| +*/
|
| +static int fts5HashEntrySort(
|
| + Fts5Hash *pHash,
|
| + const char *pTerm, int nTerm, /* Query prefix, if any */
|
| + Fts5HashEntry **ppSorted
|
| +){
|
| + const int nMergeSlot = 32;
|
| + Fts5HashEntry **ap;
|
| + Fts5HashEntry *pList;
|
| + int iSlot;
|
| + int i;
|
| +
|
| + *ppSorted = 0;
|
| + ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
|
| + if( !ap ) return SQLITE_NOMEM;
|
| + memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
|
| +
|
| + for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
|
| + Fts5HashEntry *pIter;
|
| + for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
|
| + if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
|
| + Fts5HashEntry *pEntry = pIter;
|
| + pEntry->pScanNext = 0;
|
| + for(i=0; ap[i]; i++){
|
| + pEntry = fts5HashEntryMerge(pEntry, ap[i]);
|
| + ap[i] = 0;
|
| + }
|
| + ap[i] = pEntry;
|
| + }
|
| + }
|
| + }
|
| +
|
| + pList = 0;
|
| + for(i=0; i<nMergeSlot; i++){
|
| + pList = fts5HashEntryMerge(pList, ap[i]);
|
| + }
|
| +
|
| + pHash->nEntry = 0;
|
| + sqlite3_free(ap);
|
| + *ppSorted = pList;
|
| + return SQLITE_OK;
|
| +}
|
| +
|
| +/*
|
| +** Query the hash table for a doclist associated with term pTerm/nTerm.
|
| +*/
|
| +int sqlite3Fts5HashQuery(
|
| + Fts5Hash *pHash, /* Hash table to query */
|
| + const char *pTerm, int nTerm, /* Query term */
|
| + const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
|
| + int *pnDoclist /* OUT: Size of doclist in bytes */
|
| +){
|
| + unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
|
| + Fts5HashEntry *p;
|
| +
|
| + for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
|
| + if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
|
| + }
|
| +
|
| + if( p ){
|
| + fts5HashAddPoslistSize(p);
|
| + *ppDoclist = (const u8*)&p->zKey[nTerm+1];
|
| + *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
|
| + }else{
|
| + *ppDoclist = 0;
|
| + *pnDoclist = 0;
|
| + }
|
| +
|
| + return SQLITE_OK;
|
| +}
|
| +
|
| +int sqlite3Fts5HashScanInit(
|
| + Fts5Hash *p, /* Hash table to query */
|
| + const char *pTerm, int nTerm /* Query prefix */
|
| +){
|
| + return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
|
| +}
|
| +
|
| +void sqlite3Fts5HashScanNext(Fts5Hash *p){
|
| + assert( !sqlite3Fts5HashScanEof(p) );
|
| + p->pScan = p->pScan->pScanNext;
|
| +}
|
| +
|
| +int sqlite3Fts5HashScanEof(Fts5Hash *p){
|
| + return (p->pScan==0);
|
| +}
|
| +
|
| +void sqlite3Fts5HashScanEntry(
|
| + Fts5Hash *pHash,
|
| + const char **pzTerm, /* OUT: term (nul-terminated) */
|
| + const u8 **ppDoclist, /* OUT: pointer to doclist */
|
| + int *pnDoclist /* OUT: size of doclist in bytes */
|
| +){
|
| + Fts5HashEntry *p;
|
| + if( (p = pHash->pScan) ){
|
| + int nTerm = (int)strlen(p->zKey);
|
| + fts5HashAddPoslistSize(p);
|
| + *pzTerm = p->zKey;
|
| + *ppDoclist = (const u8*)&p->zKey[nTerm+1];
|
| + *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
|
| + }else{
|
| + *pzTerm = 0;
|
| + *ppDoclist = 0;
|
| + *pnDoclist = 0;
|
| + }
|
| +}
|
| +
|
|
|