OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2014 August 11 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 */ |
| 14 |
| 15 |
| 16 |
| 17 #include "fts5Int.h" |
| 18 |
| 19 typedef struct Fts5HashEntry Fts5HashEntry; |
| 20 |
| 21 /* |
| 22 ** This file contains the implementation of an in-memory hash table used |
| 23 ** to accumuluate "term -> doclist" content before it is flused to a level-0 |
| 24 ** segment. |
| 25 */ |
| 26 |
| 27 |
| 28 struct Fts5Hash { |
| 29 int eDetail; /* Copy of Fts5Config.eDetail */ |
| 30 int *pnByte; /* Pointer to bytes counter */ |
| 31 int nEntry; /* Number of entries currently in hash */ |
| 32 int nSlot; /* Size of aSlot[] array */ |
| 33 Fts5HashEntry *pScan; /* Current ordered scan item */ |
| 34 Fts5HashEntry **aSlot; /* Array of hash slots */ |
| 35 }; |
| 36 |
| 37 /* |
| 38 ** Each entry in the hash table is represented by an object of the |
| 39 ** following type. Each object, its key (zKey[]) and its current data |
| 40 ** are stored in a single memory allocation. The position list data |
| 41 ** immediately follows the key data in memory. |
| 42 ** |
| 43 ** The data that follows the key is in a similar, but not identical format |
| 44 ** to the doclist data stored in the database. It is: |
| 45 ** |
| 46 ** * Rowid, as a varint |
| 47 ** * Position list, without 0x00 terminator. |
| 48 ** * Size of previous position list and rowid, as a 4 byte |
| 49 ** big-endian integer. |
| 50 ** |
| 51 ** iRowidOff: |
| 52 ** Offset of last rowid written to data area. Relative to first byte of |
| 53 ** structure. |
| 54 ** |
| 55 ** nData: |
| 56 ** Bytes of data written since iRowidOff. |
| 57 */ |
| 58 struct Fts5HashEntry { |
| 59 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ |
| 60 Fts5HashEntry *pScanNext; /* Next entry in sorted order */ |
| 61 |
| 62 int nAlloc; /* Total size of allocation */ |
| 63 int iSzPoslist; /* Offset of space for 4-byte poslist size */ |
| 64 int nData; /* Total bytes of data (incl. structure) */ |
| 65 int nKey; /* Length of zKey[] in bytes */ |
| 66 u8 bDel; /* Set delete-flag @ iSzPoslist */ |
| 67 u8 bContent; /* Set content-flag (detail=none mode) */ |
| 68 i16 iCol; /* Column of last value written */ |
| 69 int iPos; /* Position of last value written */ |
| 70 i64 iRowid; /* Rowid of last value written */ |
| 71 char zKey[8]; /* Nul-terminated entry key */ |
| 72 }; |
| 73 |
| 74 /* |
| 75 ** Size of Fts5HashEntry without the zKey[] array. |
| 76 */ |
| 77 #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8) |
| 78 |
| 79 |
| 80 |
| 81 /* |
| 82 ** Allocate a new hash table. |
| 83 */ |
| 84 int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ |
| 85 int rc = SQLITE_OK; |
| 86 Fts5Hash *pNew; |
| 87 |
| 88 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); |
| 89 if( pNew==0 ){ |
| 90 rc = SQLITE_NOMEM; |
| 91 }else{ |
| 92 int nByte; |
| 93 memset(pNew, 0, sizeof(Fts5Hash)); |
| 94 pNew->pnByte = pnByte; |
| 95 pNew->eDetail = pConfig->eDetail; |
| 96 |
| 97 pNew->nSlot = 1024; |
| 98 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; |
| 99 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); |
| 100 if( pNew->aSlot==0 ){ |
| 101 sqlite3_free(pNew); |
| 102 *ppNew = 0; |
| 103 rc = SQLITE_NOMEM; |
| 104 }else{ |
| 105 memset(pNew->aSlot, 0, nByte); |
| 106 } |
| 107 } |
| 108 return rc; |
| 109 } |
| 110 |
| 111 /* |
| 112 ** Free a hash table object. |
| 113 */ |
| 114 void sqlite3Fts5HashFree(Fts5Hash *pHash){ |
| 115 if( pHash ){ |
| 116 sqlite3Fts5HashClear(pHash); |
| 117 sqlite3_free(pHash->aSlot); |
| 118 sqlite3_free(pHash); |
| 119 } |
| 120 } |
| 121 |
| 122 /* |
| 123 ** Empty (but do not delete) a hash table. |
| 124 */ |
| 125 void sqlite3Fts5HashClear(Fts5Hash *pHash){ |
| 126 int i; |
| 127 for(i=0; i<pHash->nSlot; i++){ |
| 128 Fts5HashEntry *pNext; |
| 129 Fts5HashEntry *pSlot; |
| 130 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ |
| 131 pNext = pSlot->pHashNext; |
| 132 sqlite3_free(pSlot); |
| 133 } |
| 134 } |
| 135 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); |
| 136 pHash->nEntry = 0; |
| 137 } |
| 138 |
| 139 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ |
| 140 int i; |
| 141 unsigned int h = 13; |
| 142 for(i=n-1; i>=0; i--){ |
| 143 h = (h << 3) ^ h ^ p[i]; |
| 144 } |
| 145 return (h % nSlot); |
| 146 } |
| 147 |
| 148 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ |
| 149 int i; |
| 150 unsigned int h = 13; |
| 151 for(i=n-1; i>=0; i--){ |
| 152 h = (h << 3) ^ h ^ p[i]; |
| 153 } |
| 154 h = (h << 3) ^ h ^ b; |
| 155 return (h % nSlot); |
| 156 } |
| 157 |
| 158 /* |
| 159 ** Resize the hash table by doubling the number of slots. |
| 160 */ |
| 161 static int fts5HashResize(Fts5Hash *pHash){ |
| 162 int nNew = pHash->nSlot*2; |
| 163 int i; |
| 164 Fts5HashEntry **apNew; |
| 165 Fts5HashEntry **apOld = pHash->aSlot; |
| 166 |
| 167 apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*)); |
| 168 if( !apNew ) return SQLITE_NOMEM; |
| 169 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); |
| 170 |
| 171 for(i=0; i<pHash->nSlot; i++){ |
| 172 while( apOld[i] ){ |
| 173 int iHash; |
| 174 Fts5HashEntry *p = apOld[i]; |
| 175 apOld[i] = p->pHashNext; |
| 176 iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey)); |
| 177 p->pHashNext = apNew[iHash]; |
| 178 apNew[iHash] = p; |
| 179 } |
| 180 } |
| 181 |
| 182 sqlite3_free(apOld); |
| 183 pHash->nSlot = nNew; |
| 184 pHash->aSlot = apNew; |
| 185 return SQLITE_OK; |
| 186 } |
| 187 |
| 188 static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){ |
| 189 if( p->iSzPoslist ){ |
| 190 u8 *pPtr = (u8*)p; |
| 191 if( pHash->eDetail==FTS5_DETAIL_NONE ){ |
| 192 assert( p->nData==p->iSzPoslist ); |
| 193 if( p->bDel ){ |
| 194 pPtr[p->nData++] = 0x00; |
| 195 if( p->bContent ){ |
| 196 pPtr[p->nData++] = 0x00; |
| 197 } |
| 198 } |
| 199 }else{ |
| 200 int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ |
| 201 int nPos = nSz*2 + p->bDel; /* Value of nPos field */ |
| 202 |
| 203 assert( p->bDel==0 || p->bDel==1 ); |
| 204 if( nPos<=127 ){ |
| 205 pPtr[p->iSzPoslist] = (u8)nPos; |
| 206 }else{ |
| 207 int nByte = sqlite3Fts5GetVarintLen((u32)nPos); |
| 208 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); |
| 209 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); |
| 210 p->nData += (nByte-1); |
| 211 } |
| 212 } |
| 213 |
| 214 p->iSzPoslist = 0; |
| 215 p->bDel = 0; |
| 216 p->bContent = 0; |
| 217 } |
| 218 } |
| 219 |
| 220 /* |
| 221 ** Add an entry to the in-memory hash table. The key is the concatenation |
| 222 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). |
| 223 ** |
| 224 ** (bByte || pToken) -> (iRowid,iCol,iPos) |
| 225 ** |
| 226 ** Or, if iCol is negative, then the value is a delete marker. |
| 227 */ |
| 228 int sqlite3Fts5HashWrite( |
| 229 Fts5Hash *pHash, |
| 230 i64 iRowid, /* Rowid for this entry */ |
| 231 int iCol, /* Column token appears in (-ve -> delete) */ |
| 232 int iPos, /* Position of token within column */ |
| 233 char bByte, /* First byte of token */ |
| 234 const char *pToken, int nToken /* Token to add or remove to or from index */ |
| 235 ){ |
| 236 unsigned int iHash; |
| 237 Fts5HashEntry *p; |
| 238 u8 *pPtr; |
| 239 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ |
| 240 int bNew; /* If non-delete entry should be written */ |
| 241 |
| 242 bNew = (pHash->eDetail==FTS5_DETAIL_FULL); |
| 243 |
| 244 /* Attempt to locate an existing hash entry */ |
| 245 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); |
| 246 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ |
| 247 if( p->zKey[0]==bByte |
| 248 && p->nKey==nToken |
| 249 && memcmp(&p->zKey[1], pToken, nToken)==0 |
| 250 ){ |
| 251 break; |
| 252 } |
| 253 } |
| 254 |
| 255 /* If an existing hash entry cannot be found, create a new one. */ |
| 256 if( p==0 ){ |
| 257 /* Figure out how much space to allocate */ |
| 258 int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64; |
| 259 if( nByte<128 ) nByte = 128; |
| 260 |
| 261 /* Grow the Fts5Hash.aSlot[] array if necessary. */ |
| 262 if( (pHash->nEntry*2)>=pHash->nSlot ){ |
| 263 int rc = fts5HashResize(pHash); |
| 264 if( rc!=SQLITE_OK ) return rc; |
| 265 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); |
| 266 } |
| 267 |
| 268 /* Allocate new Fts5HashEntry and add it to the hash table. */ |
| 269 p = (Fts5HashEntry*)sqlite3_malloc(nByte); |
| 270 if( !p ) return SQLITE_NOMEM; |
| 271 memset(p, 0, FTS5_HASHENTRYSIZE); |
| 272 p->nAlloc = nByte; |
| 273 p->zKey[0] = bByte; |
| 274 memcpy(&p->zKey[1], pToken, nToken); |
| 275 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) ); |
| 276 p->nKey = nToken; |
| 277 p->zKey[nToken+1] = '\0'; |
| 278 p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE; |
| 279 p->pHashNext = pHash->aSlot[iHash]; |
| 280 pHash->aSlot[iHash] = p; |
| 281 pHash->nEntry++; |
| 282 |
| 283 /* Add the first rowid field to the hash-entry */ |
| 284 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); |
| 285 p->iRowid = iRowid; |
| 286 |
| 287 p->iSzPoslist = p->nData; |
| 288 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ |
| 289 p->nData += 1; |
| 290 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); |
| 291 } |
| 292 |
| 293 nIncr += p->nData; |
| 294 }else{ |
| 295 |
| 296 /* Appending to an existing hash-entry. Check that there is enough |
| 297 ** space to append the largest possible new entry. Worst case scenario |
| 298 ** is: |
| 299 ** |
| 300 ** + 9 bytes for a new rowid, |
| 301 ** + 4 byte reserved for the "poslist size" varint. |
| 302 ** + 1 byte for a "new column" byte, |
| 303 ** + 3 bytes for a new column number (16-bit max) as a varint, |
| 304 ** + 5 bytes for the new position offset (32-bit max). |
| 305 */ |
| 306 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ |
| 307 int nNew = p->nAlloc * 2; |
| 308 Fts5HashEntry *pNew; |
| 309 Fts5HashEntry **pp; |
| 310 pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); |
| 311 if( pNew==0 ) return SQLITE_NOMEM; |
| 312 pNew->nAlloc = nNew; |
| 313 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); |
| 314 *pp = pNew; |
| 315 p = pNew; |
| 316 } |
| 317 nIncr -= p->nData; |
| 318 } |
| 319 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) ); |
| 320 |
| 321 pPtr = (u8*)p; |
| 322 |
| 323 /* If this is a new rowid, append the 4-byte size field for the previous |
| 324 ** entry, and the new rowid for this entry. */ |
| 325 if( iRowid!=p->iRowid ){ |
| 326 fts5HashAddPoslistSize(pHash, p); |
| 327 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); |
| 328 p->iRowid = iRowid; |
| 329 bNew = 1; |
| 330 p->iSzPoslist = p->nData; |
| 331 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ |
| 332 p->nData += 1; |
| 333 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); |
| 334 p->iPos = 0; |
| 335 } |
| 336 } |
| 337 |
| 338 if( iCol>=0 ){ |
| 339 if( pHash->eDetail==FTS5_DETAIL_NONE ){ |
| 340 p->bContent = 1; |
| 341 }else{ |
| 342 /* Append a new column value, if necessary */ |
| 343 assert( iCol>=p->iCol ); |
| 344 if( iCol!=p->iCol ){ |
| 345 if( pHash->eDetail==FTS5_DETAIL_FULL ){ |
| 346 pPtr[p->nData++] = 0x01; |
| 347 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); |
| 348 p->iCol = (i16)iCol; |
| 349 p->iPos = 0; |
| 350 }else{ |
| 351 bNew = 1; |
| 352 p->iCol = (i16)(iPos = iCol); |
| 353 } |
| 354 } |
| 355 |
| 356 /* Append the new position offset, if necessary */ |
| 357 if( bNew ){ |
| 358 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); |
| 359 p->iPos = iPos; |
| 360 } |
| 361 } |
| 362 }else{ |
| 363 /* This is a delete. Set the delete flag. */ |
| 364 p->bDel = 1; |
| 365 } |
| 366 |
| 367 nIncr += p->nData; |
| 368 *pHash->pnByte += nIncr; |
| 369 return SQLITE_OK; |
| 370 } |
| 371 |
| 372 |
| 373 /* |
| 374 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, |
| 375 ** each sorted in key order. This function merges the two lists into a |
| 376 ** single list and returns a pointer to its first element. |
| 377 */ |
| 378 static Fts5HashEntry *fts5HashEntryMerge( |
| 379 Fts5HashEntry *pLeft, |
| 380 Fts5HashEntry *pRight |
| 381 ){ |
| 382 Fts5HashEntry *p1 = pLeft; |
| 383 Fts5HashEntry *p2 = pRight; |
| 384 Fts5HashEntry *pRet = 0; |
| 385 Fts5HashEntry **ppOut = &pRet; |
| 386 |
| 387 while( p1 || p2 ){ |
| 388 if( p1==0 ){ |
| 389 *ppOut = p2; |
| 390 p2 = 0; |
| 391 }else if( p2==0 ){ |
| 392 *ppOut = p1; |
| 393 p1 = 0; |
| 394 }else{ |
| 395 int i = 0; |
| 396 while( p1->zKey[i]==p2->zKey[i] ) i++; |
| 397 |
| 398 if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ |
| 399 /* p2 is smaller */ |
| 400 *ppOut = p2; |
| 401 ppOut = &p2->pScanNext; |
| 402 p2 = p2->pScanNext; |
| 403 }else{ |
| 404 /* p1 is smaller */ |
| 405 *ppOut = p1; |
| 406 ppOut = &p1->pScanNext; |
| 407 p1 = p1->pScanNext; |
| 408 } |
| 409 *ppOut = 0; |
| 410 } |
| 411 } |
| 412 |
| 413 return pRet; |
| 414 } |
| 415 |
| 416 /* |
| 417 ** Extract all tokens from hash table iHash and link them into a list |
| 418 ** in sorted order. The hash table is cleared before returning. It is |
| 419 ** the responsibility of the caller to free the elements of the returned |
| 420 ** list. |
| 421 */ |
| 422 static int fts5HashEntrySort( |
| 423 Fts5Hash *pHash, |
| 424 const char *pTerm, int nTerm, /* Query prefix, if any */ |
| 425 Fts5HashEntry **ppSorted |
| 426 ){ |
| 427 const int nMergeSlot = 32; |
| 428 Fts5HashEntry **ap; |
| 429 Fts5HashEntry *pList; |
| 430 int iSlot; |
| 431 int i; |
| 432 |
| 433 *ppSorted = 0; |
| 434 ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); |
| 435 if( !ap ) return SQLITE_NOMEM; |
| 436 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); |
| 437 |
| 438 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ |
| 439 Fts5HashEntry *pIter; |
| 440 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ |
| 441 if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){ |
| 442 Fts5HashEntry *pEntry = pIter; |
| 443 pEntry->pScanNext = 0; |
| 444 for(i=0; ap[i]; i++){ |
| 445 pEntry = fts5HashEntryMerge(pEntry, ap[i]); |
| 446 ap[i] = 0; |
| 447 } |
| 448 ap[i] = pEntry; |
| 449 } |
| 450 } |
| 451 } |
| 452 |
| 453 pList = 0; |
| 454 for(i=0; i<nMergeSlot; i++){ |
| 455 pList = fts5HashEntryMerge(pList, ap[i]); |
| 456 } |
| 457 |
| 458 pHash->nEntry = 0; |
| 459 sqlite3_free(ap); |
| 460 *ppSorted = pList; |
| 461 return SQLITE_OK; |
| 462 } |
| 463 |
| 464 /* |
| 465 ** Query the hash table for a doclist associated with term pTerm/nTerm. |
| 466 */ |
| 467 int sqlite3Fts5HashQuery( |
| 468 Fts5Hash *pHash, /* Hash table to query */ |
| 469 const char *pTerm, int nTerm, /* Query term */ |
| 470 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ |
| 471 int *pnDoclist /* OUT: Size of doclist in bytes */ |
| 472 ){ |
| 473 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); |
| 474 Fts5HashEntry *p; |
| 475 |
| 476 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ |
| 477 if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; |
| 478 } |
| 479 |
| 480 if( p ){ |
| 481 fts5HashAddPoslistSize(pHash, p); |
| 482 *ppDoclist = (const u8*)&p->zKey[nTerm+1]; |
| 483 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1); |
| 484 }else{ |
| 485 *ppDoclist = 0; |
| 486 *pnDoclist = 0; |
| 487 } |
| 488 |
| 489 return SQLITE_OK; |
| 490 } |
| 491 |
| 492 int sqlite3Fts5HashScanInit( |
| 493 Fts5Hash *p, /* Hash table to query */ |
| 494 const char *pTerm, int nTerm /* Query prefix */ |
| 495 ){ |
| 496 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); |
| 497 } |
| 498 |
| 499 void sqlite3Fts5HashScanNext(Fts5Hash *p){ |
| 500 assert( !sqlite3Fts5HashScanEof(p) ); |
| 501 p->pScan = p->pScan->pScanNext; |
| 502 } |
| 503 |
| 504 int sqlite3Fts5HashScanEof(Fts5Hash *p){ |
| 505 return (p->pScan==0); |
| 506 } |
| 507 |
| 508 void sqlite3Fts5HashScanEntry( |
| 509 Fts5Hash *pHash, |
| 510 const char **pzTerm, /* OUT: term (nul-terminated) */ |
| 511 const u8 **ppDoclist, /* OUT: pointer to doclist */ |
| 512 int *pnDoclist /* OUT: size of doclist in bytes */ |
| 513 ){ |
| 514 Fts5HashEntry *p; |
| 515 if( (p = pHash->pScan) ){ |
| 516 int nTerm = (int)strlen(p->zKey); |
| 517 fts5HashAddPoslistSize(pHash, p); |
| 518 *pzTerm = p->zKey; |
| 519 *ppDoclist = (const u8*)&p->zKey[nTerm+1]; |
| 520 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1); |
| 521 }else{ |
| 522 *pzTerm = 0; |
| 523 *ppDoclist = 0; |
| 524 *pnDoclist = 0; |
| 525 } |
| 526 } |
| 527 |
OLD | NEW |