OLD | NEW |
| (Empty) |
1 /* | |
2 ** 2001 September 22 | |
3 ** | |
4 ** The author disclaims copyright to this source code. In place of | |
5 ** a legal notice, here is a blessing: | |
6 ** | |
7 ** May you do good and not evil. | |
8 ** May you find forgiveness for yourself and forgive others. | |
9 ** May you share freely, never taking more than you give. | |
10 ** | |
11 ************************************************************************* | |
12 ** This is the implementation of generic hash-tables used in SQLite. | |
13 ** We've modified it slightly to serve as a standalone hash table | |
14 ** implementation for the full-text indexing module. | |
15 */ | |
16 #include <assert.h> | |
17 #include <stdlib.h> | |
18 #include <string.h> | |
19 | |
20 #include "ft_hash.h" | |
21 | |
22 void *malloc_and_zero(int n){ | |
23 void *p = malloc(n); | |
24 if( p ){ | |
25 memset(p, 0, n); | |
26 } | |
27 return p; | |
28 } | |
29 | |
30 /* Turn bulk memory into a hash table object by initializing the | |
31 ** fields of the Hash structure. | |
32 ** | |
33 ** "pNew" is a pointer to the hash table that is to be initialized. | |
34 ** keyClass is one of the constants HASH_INT, HASH_POINTER, | |
35 ** HASH_BINARY, or HASH_STRING. The value of keyClass | |
36 ** determines what kind of key the hash table will use. "copyKey" is | |
37 ** true if the hash table should make its own private copy of keys and | |
38 ** false if it should just use the supplied pointer. CopyKey only makes | |
39 ** sense for HASH_STRING and HASH_BINARY and is ignored | |
40 ** for other key classes. | |
41 */ | |
42 void HashInit(Hash *pNew, int keyClass, int copyKey){ | |
43 assert( pNew!=0 ); | |
44 assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY ); | |
45 pNew->keyClass = keyClass; | |
46 #if 0 | |
47 if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0; | |
48 #endif | |
49 pNew->copyKey = copyKey; | |
50 pNew->first = 0; | |
51 pNew->count = 0; | |
52 pNew->htsize = 0; | |
53 pNew->ht = 0; | |
54 pNew->xMalloc = malloc_and_zero; | |
55 pNew->xFree = free; | |
56 } | |
57 | |
58 /* Remove all entries from a hash table. Reclaim all memory. | |
59 ** Call this routine to delete a hash table or to reset a hash table | |
60 ** to the empty state. | |
61 */ | |
62 void HashClear(Hash *pH){ | |
63 HashElem *elem; /* For looping over all elements of the table */ | |
64 | |
65 assert( pH!=0 ); | |
66 elem = pH->first; | |
67 pH->first = 0; | |
68 if( pH->ht ) pH->xFree(pH->ht); | |
69 pH->ht = 0; | |
70 pH->htsize = 0; | |
71 while( elem ){ | |
72 HashElem *next_elem = elem->next; | |
73 if( pH->copyKey && elem->pKey ){ | |
74 pH->xFree(elem->pKey); | |
75 } | |
76 pH->xFree(elem); | |
77 elem = next_elem; | |
78 } | |
79 pH->count = 0; | |
80 } | |
81 | |
82 #if 0 /* NOT USED */ | |
83 /* | |
84 ** Hash and comparison functions when the mode is HASH_INT | |
85 */ | |
86 static int intHash(const void *pKey, int nKey){ | |
87 return nKey ^ (nKey<<8) ^ (nKey>>8); | |
88 } | |
89 static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){ | |
90 return n2 - n1; | |
91 } | |
92 #endif | |
93 | |
94 #if 0 /* NOT USED */ | |
95 /* | |
96 ** Hash and comparison functions when the mode is HASH_POINTER | |
97 */ | |
98 static int ptrHash(const void *pKey, int nKey){ | |
99 uptr x = Addr(pKey); | |
100 return x ^ (x<<8) ^ (x>>8); | |
101 } | |
102 static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ | |
103 if( pKey1==pKey2 ) return 0; | |
104 if( pKey1<pKey2 ) return -1; | |
105 return 1; | |
106 } | |
107 #endif | |
108 | |
109 /* | |
110 ** Hash and comparison functions when the mode is HASH_STRING | |
111 */ | |
112 static int strHash(const void *pKey, int nKey){ | |
113 const char *z = (const char *)pKey; | |
114 int h = 0; | |
115 if( nKey<=0 ) nKey = (int) strlen(z); | |
116 while( nKey > 0 ){ | |
117 h = (h<<3) ^ h ^ *z++; | |
118 nKey--; | |
119 } | |
120 return h & 0x7fffffff; | |
121 } | |
122 static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ | |
123 if( n1!=n2 ) return 1; | |
124 return strncmp((const char*)pKey1,(const char*)pKey2,n1); | |
125 } | |
126 | |
127 /* | |
128 ** Hash and comparison functions when the mode is HASH_BINARY | |
129 */ | |
130 static int binHash(const void *pKey, int nKey){ | |
131 int h = 0; | |
132 const char *z = (const char *)pKey; | |
133 while( nKey-- > 0 ){ | |
134 h = (h<<3) ^ h ^ *(z++); | |
135 } | |
136 return h & 0x7fffffff; | |
137 } | |
138 static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ | |
139 if( n1!=n2 ) return 1; | |
140 return memcmp(pKey1,pKey2,n1); | |
141 } | |
142 | |
143 /* | |
144 ** Return a pointer to the appropriate hash function given the key class. | |
145 ** | |
146 ** The C syntax in this function definition may be unfamilar to some | |
147 ** programmers, so we provide the following additional explanation: | |
148 ** | |
149 ** The name of the function is "hashFunction". The function takes a | |
150 ** single parameter "keyClass". The return value of hashFunction() | |
151 ** is a pointer to another function. Specifically, the return value | |
152 ** of hashFunction() is a pointer to a function that takes two parameters | |
153 ** with types "const void*" and "int" and returns an "int". | |
154 */ | |
155 static int (*hashFunction(int keyClass))(const void*,int){ | |
156 #if 0 /* HASH_INT and HASH_POINTER are never used */ | |
157 switch( keyClass ){ | |
158 case HASH_INT: return &intHash; | |
159 case HASH_POINTER: return &ptrHash; | |
160 case HASH_STRING: return &strHash; | |
161 case HASH_BINARY: return &binHash;; | |
162 default: break; | |
163 } | |
164 return 0; | |
165 #else | |
166 if( keyClass==HASH_STRING ){ | |
167 return &strHash; | |
168 }else{ | |
169 assert( keyClass==HASH_BINARY ); | |
170 return &binHash; | |
171 } | |
172 #endif | |
173 } | |
174 | |
175 /* | |
176 ** Return a pointer to the appropriate hash function given the key class. | |
177 ** | |
178 ** For help in interpreted the obscure C code in the function definition, | |
179 ** see the header comment on the previous function. | |
180 */ | |
181 static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ | |
182 #if 0 /* HASH_INT and HASH_POINTER are never used */ | |
183 switch( keyClass ){ | |
184 case HASH_INT: return &intCompare; | |
185 case HASH_POINTER: return &ptrCompare; | |
186 case HASH_STRING: return &strCompare; | |
187 case HASH_BINARY: return &binCompare; | |
188 default: break; | |
189 } | |
190 return 0; | |
191 #else | |
192 if( keyClass==HASH_STRING ){ | |
193 return &strCompare; | |
194 }else{ | |
195 assert( keyClass==HASH_BINARY ); | |
196 return &binCompare; | |
197 } | |
198 #endif | |
199 } | |
200 | |
201 /* Link an element into the hash table | |
202 */ | |
203 static void insertElement( | |
204 Hash *pH, /* The complete hash table */ | |
205 struct _ht *pEntry, /* The entry into which pNew is inserted */ | |
206 HashElem *pNew /* The element to be inserted */ | |
207 ){ | |
208 HashElem *pHead; /* First element already in pEntry */ | |
209 pHead = pEntry->chain; | |
210 if( pHead ){ | |
211 pNew->next = pHead; | |
212 pNew->prev = pHead->prev; | |
213 if( pHead->prev ){ pHead->prev->next = pNew; } | |
214 else { pH->first = pNew; } | |
215 pHead->prev = pNew; | |
216 }else{ | |
217 pNew->next = pH->first; | |
218 if( pH->first ){ pH->first->prev = pNew; } | |
219 pNew->prev = 0; | |
220 pH->first = pNew; | |
221 } | |
222 pEntry->count++; | |
223 pEntry->chain = pNew; | |
224 } | |
225 | |
226 | |
227 /* Resize the hash table so that it cantains "new_size" buckets. | |
228 ** "new_size" must be a power of 2. The hash table might fail | |
229 ** to resize if sqliteMalloc() fails. | |
230 */ | |
231 static void rehash(Hash *pH, int new_size){ | |
232 struct _ht *new_ht; /* The new hash table */ | |
233 HashElem *elem, *next_elem; /* For looping over existing elements */ | |
234 int (*xHash)(const void*,int); /* The hash function */ | |
235 | |
236 assert( (new_size & (new_size-1))==0 ); | |
237 new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) ); | |
238 if( new_ht==0 ) return; | |
239 if( pH->ht ) pH->xFree(pH->ht); | |
240 pH->ht = new_ht; | |
241 pH->htsize = new_size; | |
242 xHash = hashFunction(pH->keyClass); | |
243 for(elem=pH->first, pH->first=0; elem; elem = next_elem){ | |
244 int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); | |
245 next_elem = elem->next; | |
246 insertElement(pH, &new_ht[h], elem); | |
247 } | |
248 } | |
249 | |
250 /* This function (for internal use only) locates an element in an | |
251 ** hash table that matches the given key. The hash for this key has | |
252 ** already been computed and is passed as the 4th parameter. | |
253 */ | |
254 static HashElem *findElementGivenHash( | |
255 const Hash *pH, /* The pH to be searched */ | |
256 const void *pKey, /* The key we are searching for */ | |
257 int nKey, | |
258 int h /* The hash for this key. */ | |
259 ){ | |
260 HashElem *elem; /* Used to loop thru the element list */ | |
261 int count; /* Number of elements left to test */ | |
262 int (*xCompare)(const void*,int,const void*,int); /* comparison function */ | |
263 | |
264 if( pH->ht ){ | |
265 struct _ht *pEntry = &pH->ht[h]; | |
266 elem = pEntry->chain; | |
267 count = pEntry->count; | |
268 xCompare = compareFunction(pH->keyClass); | |
269 while( count-- && elem ){ | |
270 if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ | |
271 return elem; | |
272 } | |
273 elem = elem->next; | |
274 } | |
275 } | |
276 return 0; | |
277 } | |
278 | |
279 /* Remove a single entry from the hash table given a pointer to that | |
280 ** element and a hash on the element's key. | |
281 */ | |
282 static void removeElementGivenHash( | |
283 Hash *pH, /* The pH containing "elem" */ | |
284 HashElem* elem, /* The element to be removed from the pH */ | |
285 int h /* Hash value for the element */ | |
286 ){ | |
287 struct _ht *pEntry; | |
288 if( elem->prev ){ | |
289 elem->prev->next = elem->next; | |
290 }else{ | |
291 pH->first = elem->next; | |
292 } | |
293 if( elem->next ){ | |
294 elem->next->prev = elem->prev; | |
295 } | |
296 pEntry = &pH->ht[h]; | |
297 if( pEntry->chain==elem ){ | |
298 pEntry->chain = elem->next; | |
299 } | |
300 pEntry->count--; | |
301 if( pEntry->count<=0 ){ | |
302 pEntry->chain = 0; | |
303 } | |
304 if( pH->copyKey && elem->pKey ){ | |
305 pH->xFree(elem->pKey); | |
306 } | |
307 pH->xFree( elem ); | |
308 pH->count--; | |
309 if( pH->count<=0 ){ | |
310 assert( pH->first==0 ); | |
311 assert( pH->count==0 ); | |
312 HashClear(pH); | |
313 } | |
314 } | |
315 | |
316 /* Attempt to locate an element of the hash table pH with a key | |
317 ** that matches pKey,nKey. Return the data for this element if it is | |
318 ** found, or NULL if there is no match. | |
319 */ | |
320 void *HashFind(const Hash *pH, const void *pKey, int nKey){ | |
321 int h; /* A hash on key */ | |
322 HashElem *elem; /* The element that matches key */ | |
323 int (*xHash)(const void*,int); /* The hash function */ | |
324 | |
325 if( pH==0 || pH->ht==0 ) return 0; | |
326 xHash = hashFunction(pH->keyClass); | |
327 assert( xHash!=0 ); | |
328 h = (*xHash)(pKey,nKey); | |
329 assert( (pH->htsize & (pH->htsize-1))==0 ); | |
330 elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); | |
331 return elem ? elem->data : 0; | |
332 } | |
333 | |
334 /* Insert an element into the hash table pH. The key is pKey,nKey | |
335 ** and the data is "data". | |
336 ** | |
337 ** If no element exists with a matching key, then a new | |
338 ** element is created. A copy of the key is made if the copyKey | |
339 ** flag is set. NULL is returned. | |
340 ** | |
341 ** If another element already exists with the same key, then the | |
342 ** new data replaces the old data and the old data is returned. | |
343 ** The key is not copied in this instance. If a malloc fails, then | |
344 ** the new data is returned and the hash table is unchanged. | |
345 ** | |
346 ** If the "data" parameter to this function is NULL, then the | |
347 ** element corresponding to "key" is removed from the hash table. | |
348 */ | |
349 void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){ | |
350 int hraw; /* Raw hash value of the key */ | |
351 int h; /* the hash of the key modulo hash table size */ | |
352 HashElem *elem; /* Used to loop thru the element list */ | |
353 HashElem *new_elem; /* New element added to the pH */ | |
354 int (*xHash)(const void*,int); /* The hash function */ | |
355 | |
356 assert( pH!=0 ); | |
357 xHash = hashFunction(pH->keyClass); | |
358 assert( xHash!=0 ); | |
359 hraw = (*xHash)(pKey, nKey); | |
360 assert( (pH->htsize & (pH->htsize-1))==0 ); | |
361 h = hraw & (pH->htsize-1); | |
362 elem = findElementGivenHash(pH,pKey,nKey,h); | |
363 if( elem ){ | |
364 void *old_data = elem->data; | |
365 if( data==0 ){ | |
366 removeElementGivenHash(pH,elem,h); | |
367 }else{ | |
368 elem->data = data; | |
369 } | |
370 return old_data; | |
371 } | |
372 if( data==0 ) return 0; | |
373 new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) ); | |
374 if( new_elem==0 ) return data; | |
375 if( pH->copyKey && pKey!=0 ){ | |
376 new_elem->pKey = pH->xMalloc( nKey ); | |
377 if( new_elem->pKey==0 ){ | |
378 pH->xFree(new_elem); | |
379 return data; | |
380 } | |
381 memcpy((void*)new_elem->pKey, pKey, nKey); | |
382 }else{ | |
383 new_elem->pKey = (void*)pKey; | |
384 } | |
385 new_elem->nKey = nKey; | |
386 pH->count++; | |
387 if( pH->htsize==0 ){ | |
388 rehash(pH,8); | |
389 if( pH->htsize==0 ){ | |
390 pH->count = 0; | |
391 pH->xFree(new_elem); | |
392 return data; | |
393 } | |
394 } | |
395 if( pH->count > pH->htsize ){ | |
396 rehash(pH,pH->htsize*2); | |
397 } | |
398 assert( pH->htsize>0 ); | |
399 assert( (pH->htsize & (pH->htsize-1))==0 ); | |
400 h = hraw & (pH->htsize-1); | |
401 insertElement(pH, &pH->ht[h], new_elem); | |
402 new_elem->data = data; | |
403 return 0; | |
404 } | |
OLD | NEW |