Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_hash.c

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 ** 2014 August 11
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 */
14
15
16
17 #include "fts5Int.h"
18
19 typedef struct Fts5HashEntry Fts5HashEntry;
20
21 /*
22 ** This file contains the implementation of an in-memory hash table used
23 ** to accumuluate "term -> doclist" content before it is flused to a level-0
24 ** segment.
25 */
26
27
28 struct Fts5Hash {
29 int *pnByte; /* Pointer to bytes counter */
30 int nEntry; /* Number of entries currently in hash */
31 int nSlot; /* Size of aSlot[] array */
32 Fts5HashEntry *pScan; /* Current ordered scan item */
33 Fts5HashEntry **aSlot; /* Array of hash slots */
34 };
35
36 /*
37 ** Each entry in the hash table is represented by an object of the
38 ** following type. Each object, its key (zKey[]) and its current data
39 ** are stored in a single memory allocation. The position list data
40 ** immediately follows the key data in memory.
41 **
42 ** The data that follows the key is in a similar, but not identical format
43 ** to the doclist data stored in the database. It is:
44 **
45 ** * Rowid, as a varint
46 ** * Position list, without 0x00 terminator.
47 ** * Size of previous position list and rowid, as a 4 byte
48 ** big-endian integer.
49 **
50 ** iRowidOff:
51 ** Offset of last rowid written to data area. Relative to first byte of
52 ** structure.
53 **
54 ** nData:
55 ** Bytes of data written since iRowidOff.
56 */
57 struct Fts5HashEntry {
58 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
59 Fts5HashEntry *pScanNext; /* Next entry in sorted order */
60
61 int nAlloc; /* Total size of allocation */
62 int iSzPoslist; /* Offset of space for 4-byte poslist size */
63 int nData; /* Total bytes of data (incl. structure) */
64 u8 bDel; /* Set delete-flag @ iSzPoslist */
65
66 int iCol; /* Column of last value written */
67 int iPos; /* Position of last value written */
68 i64 iRowid; /* Rowid of last value written */
69 char zKey[8]; /* Nul-terminated entry key */
70 };
71
72 /*
73 ** Size of Fts5HashEntry without the zKey[] array.
74 */
75 #define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
76
77
78
79 /*
80 ** Allocate a new hash table.
81 */
82 int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
83 int rc = SQLITE_OK;
84 Fts5Hash *pNew;
85
86 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
87 if( pNew==0 ){
88 rc = SQLITE_NOMEM;
89 }else{
90 int nByte;
91 memset(pNew, 0, sizeof(Fts5Hash));
92 pNew->pnByte = pnByte;
93
94 pNew->nSlot = 1024;
95 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
96 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
97 if( pNew->aSlot==0 ){
98 sqlite3_free(pNew);
99 *ppNew = 0;
100 rc = SQLITE_NOMEM;
101 }else{
102 memset(pNew->aSlot, 0, nByte);
103 }
104 }
105 return rc;
106 }
107
108 /*
109 ** Free a hash table object.
110 */
111 void sqlite3Fts5HashFree(Fts5Hash *pHash){
112 if( pHash ){
113 sqlite3Fts5HashClear(pHash);
114 sqlite3_free(pHash->aSlot);
115 sqlite3_free(pHash);
116 }
117 }
118
119 /*
120 ** Empty (but do not delete) a hash table.
121 */
122 void sqlite3Fts5HashClear(Fts5Hash *pHash){
123 int i;
124 for(i=0; i<pHash->nSlot; i++){
125 Fts5HashEntry *pNext;
126 Fts5HashEntry *pSlot;
127 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
128 pNext = pSlot->pHashNext;
129 sqlite3_free(pSlot);
130 }
131 }
132 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
133 pHash->nEntry = 0;
134 }
135
136 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
137 int i;
138 unsigned int h = 13;
139 for(i=n-1; i>=0; i--){
140 h = (h << 3) ^ h ^ p[i];
141 }
142 return (h % nSlot);
143 }
144
145 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
146 int i;
147 unsigned int h = 13;
148 for(i=n-1; i>=0; i--){
149 h = (h << 3) ^ h ^ p[i];
150 }
151 h = (h << 3) ^ h ^ b;
152 return (h % nSlot);
153 }
154
155 /*
156 ** Resize the hash table by doubling the number of slots.
157 */
158 static int fts5HashResize(Fts5Hash *pHash){
159 int nNew = pHash->nSlot*2;
160 int i;
161 Fts5HashEntry **apNew;
162 Fts5HashEntry **apOld = pHash->aSlot;
163
164 apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
165 if( !apNew ) return SQLITE_NOMEM;
166 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
167
168 for(i=0; i<pHash->nSlot; i++){
169 while( apOld[i] ){
170 int iHash;
171 Fts5HashEntry *p = apOld[i];
172 apOld[i] = p->pHashNext;
173 iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey));
174 p->pHashNext = apNew[iHash];
175 apNew[iHash] = p;
176 }
177 }
178
179 sqlite3_free(apOld);
180 pHash->nSlot = nNew;
181 pHash->aSlot = apNew;
182 return SQLITE_OK;
183 }
184
185 static void fts5HashAddPoslistSize(Fts5HashEntry *p){
186 if( p->iSzPoslist ){
187 u8 *pPtr = (u8*)p;
188 int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
189 int nPos = nSz*2 + p->bDel; /* Value of nPos field */
190
191 assert( p->bDel==0 || p->bDel==1 );
192 if( nPos<=127 ){
193 pPtr[p->iSzPoslist] = (u8)nPos;
194 }else{
195 int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
196 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
197 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
198 p->nData += (nByte-1);
199 }
200 p->bDel = 0;
201 p->iSzPoslist = 0;
202 }
203 }
204
205 int sqlite3Fts5HashWrite(
206 Fts5Hash *pHash,
207 i64 iRowid, /* Rowid for this entry */
208 int iCol, /* Column token appears in (-ve -> delete) */
209 int iPos, /* Position of token within column */
210 char bByte, /* First byte of token */
211 const char *pToken, int nToken /* Token to add or remove to or from index */
212 ){
213 unsigned int iHash;
214 Fts5HashEntry *p;
215 u8 *pPtr;
216 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
217
218 /* Attempt to locate an existing hash entry */
219 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
220 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
221 if( p->zKey[0]==bByte
222 && memcmp(&p->zKey[1], pToken, nToken)==0
223 && p->zKey[nToken+1]==0
224 ){
225 break;
226 }
227 }
228
229 /* If an existing hash entry cannot be found, create a new one. */
230 if( p==0 ){
231 int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64;
232 if( nByte<128 ) nByte = 128;
233
234 if( (pHash->nEntry*2)>=pHash->nSlot ){
235 int rc = fts5HashResize(pHash);
236 if( rc!=SQLITE_OK ) return rc;
237 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
238 }
239
240 p = (Fts5HashEntry*)sqlite3_malloc(nByte);
241 if( !p ) return SQLITE_NOMEM;
242 memset(p, 0, FTS5_HASHENTRYSIZE);
243 p->nAlloc = nByte;
244 p->zKey[0] = bByte;
245 memcpy(&p->zKey[1], pToken, nToken);
246 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) );
247 p->zKey[nToken+1] = '\0';
248 p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE;
249 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
250 p->iSzPoslist = p->nData;
251 p->nData += 1;
252 p->iRowid = iRowid;
253 p->pHashNext = pHash->aSlot[iHash];
254 pHash->aSlot[iHash] = p;
255 pHash->nEntry++;
256 nIncr += p->nData;
257 }
258
259 /* Check there is enough space to append a new entry. Worst case scenario
260 ** is:
261 **
262 ** + 9 bytes for a new rowid,
263 ** + 4 byte reserved for the "poslist size" varint.
264 ** + 1 byte for a "new column" byte,
265 ** + 3 bytes for a new column number (16-bit max) as a varint,
266 ** + 5 bytes for the new position offset (32-bit max).
267 */
268 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
269 int nNew = p->nAlloc * 2;
270 Fts5HashEntry *pNew;
271 Fts5HashEntry **pp;
272 pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
273 if( pNew==0 ) return SQLITE_NOMEM;
274 pNew->nAlloc = nNew;
275 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
276 *pp = pNew;
277 p = pNew;
278 }
279 pPtr = (u8*)p;
280 nIncr -= p->nData;
281
282 /* If this is a new rowid, append the 4-byte size field for the previous
283 ** entry, and the new rowid for this entry. */
284 if( iRowid!=p->iRowid ){
285 fts5HashAddPoslistSize(p);
286 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
287 p->iSzPoslist = p->nData;
288 p->nData += 1;
289 p->iCol = 0;
290 p->iPos = 0;
291 p->iRowid = iRowid;
292 }
293
294 if( iCol>=0 ){
295 /* Append a new column value, if necessary */
296 assert( iCol>=p->iCol );
297 if( iCol!=p->iCol ){
298 pPtr[p->nData++] = 0x01;
299 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
300 p->iCol = iCol;
301 p->iPos = 0;
302 }
303
304 /* Append the new position offset */
305 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
306 p->iPos = iPos;
307 }else{
308 /* This is a delete. Set the delete flag. */
309 p->bDel = 1;
310 }
311 nIncr += p->nData;
312
313 *pHash->pnByte += nIncr;
314 return SQLITE_OK;
315 }
316
317
318 /*
319 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
320 ** each sorted in key order. This function merges the two lists into a
321 ** single list and returns a pointer to its first element.
322 */
323 static Fts5HashEntry *fts5HashEntryMerge(
324 Fts5HashEntry *pLeft,
325 Fts5HashEntry *pRight
326 ){
327 Fts5HashEntry *p1 = pLeft;
328 Fts5HashEntry *p2 = pRight;
329 Fts5HashEntry *pRet = 0;
330 Fts5HashEntry **ppOut = &pRet;
331
332 while( p1 || p2 ){
333 if( p1==0 ){
334 *ppOut = p2;
335 p2 = 0;
336 }else if( p2==0 ){
337 *ppOut = p1;
338 p1 = 0;
339 }else{
340 int i = 0;
341 while( p1->zKey[i]==p2->zKey[i] ) i++;
342
343 if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
344 /* p2 is smaller */
345 *ppOut = p2;
346 ppOut = &p2->pScanNext;
347 p2 = p2->pScanNext;
348 }else{
349 /* p1 is smaller */
350 *ppOut = p1;
351 ppOut = &p1->pScanNext;
352 p1 = p1->pScanNext;
353 }
354 *ppOut = 0;
355 }
356 }
357
358 return pRet;
359 }
360
361 /*
362 ** Extract all tokens from hash table iHash and link them into a list
363 ** in sorted order. The hash table is cleared before returning. It is
364 ** the responsibility of the caller to free the elements of the returned
365 ** list.
366 */
367 static int fts5HashEntrySort(
368 Fts5Hash *pHash,
369 const char *pTerm, int nTerm, /* Query prefix, if any */
370 Fts5HashEntry **ppSorted
371 ){
372 const int nMergeSlot = 32;
373 Fts5HashEntry **ap;
374 Fts5HashEntry *pList;
375 int iSlot;
376 int i;
377
378 *ppSorted = 0;
379 ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
380 if( !ap ) return SQLITE_NOMEM;
381 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
382
383 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
384 Fts5HashEntry *pIter;
385 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
386 if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
387 Fts5HashEntry *pEntry = pIter;
388 pEntry->pScanNext = 0;
389 for(i=0; ap[i]; i++){
390 pEntry = fts5HashEntryMerge(pEntry, ap[i]);
391 ap[i] = 0;
392 }
393 ap[i] = pEntry;
394 }
395 }
396 }
397
398 pList = 0;
399 for(i=0; i<nMergeSlot; i++){
400 pList = fts5HashEntryMerge(pList, ap[i]);
401 }
402
403 pHash->nEntry = 0;
404 sqlite3_free(ap);
405 *ppSorted = pList;
406 return SQLITE_OK;
407 }
408
409 /*
410 ** Query the hash table for a doclist associated with term pTerm/nTerm.
411 */
412 int sqlite3Fts5HashQuery(
413 Fts5Hash *pHash, /* Hash table to query */
414 const char *pTerm, int nTerm, /* Query term */
415 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
416 int *pnDoclist /* OUT: Size of doclist in bytes */
417 ){
418 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
419 Fts5HashEntry *p;
420
421 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
422 if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
423 }
424
425 if( p ){
426 fts5HashAddPoslistSize(p);
427 *ppDoclist = (const u8*)&p->zKey[nTerm+1];
428 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
429 }else{
430 *ppDoclist = 0;
431 *pnDoclist = 0;
432 }
433
434 return SQLITE_OK;
435 }
436
437 int sqlite3Fts5HashScanInit(
438 Fts5Hash *p, /* Hash table to query */
439 const char *pTerm, int nTerm /* Query prefix */
440 ){
441 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
442 }
443
444 void sqlite3Fts5HashScanNext(Fts5Hash *p){
445 assert( !sqlite3Fts5HashScanEof(p) );
446 p->pScan = p->pScan->pScanNext;
447 }
448
449 int sqlite3Fts5HashScanEof(Fts5Hash *p){
450 return (p->pScan==0);
451 }
452
453 void sqlite3Fts5HashScanEntry(
454 Fts5Hash *pHash,
455 const char **pzTerm, /* OUT: term (nul-terminated) */
456 const u8 **ppDoclist, /* OUT: pointer to doclist */
457 int *pnDoclist /* OUT: size of doclist in bytes */
458 ){
459 Fts5HashEntry *p;
460 if( (p = pHash->pScan) ){
461 int nTerm = (int)strlen(p->zKey);
462 fts5HashAddPoslistSize(p);
463 *pzTerm = p->zKey;
464 *ppDoclist = (const u8*)&p->zKey[nTerm+1];
465 *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
466 }else{
467 *pzTerm = 0;
468 *ppDoclist = 0;
469 *pnDoclist = 0;
470 }
471 }
472
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698