OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2008 December 3 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ************************************************************************* |
| 12 ** |
| 13 ** This module implements an object we call a "RowSet". |
| 14 ** |
| 15 ** The RowSet object is a collection of rowids. Rowids |
| 16 ** are inserted into the RowSet in an arbitrary order. Inserts |
| 17 ** can be intermixed with tests to see if a given rowid has been |
| 18 ** previously inserted into the RowSet. |
| 19 ** |
| 20 ** After all inserts are finished, it is possible to extract the |
| 21 ** elements of the RowSet in sorted order. Once this extraction |
| 22 ** process has started, no new elements may be inserted. |
| 23 ** |
| 24 ** Hence, the primitive operations for a RowSet are: |
| 25 ** |
| 26 ** CREATE |
| 27 ** INSERT |
| 28 ** TEST |
| 29 ** SMALLEST |
| 30 ** DESTROY |
| 31 ** |
| 32 ** The CREATE and DESTROY primitives are the constructor and destructor, |
| 33 ** obviously. The INSERT primitive adds a new element to the RowSet. |
| 34 ** TEST checks to see if an element is already in the RowSet. SMALLEST |
| 35 ** extracts the least value from the RowSet. |
| 36 ** |
| 37 ** The INSERT primitive might allocate additional memory. Memory is |
| 38 ** allocated in chunks so most INSERTs do no allocation. There is an |
| 39 ** upper bound on the size of allocated memory. No memory is freed |
| 40 ** until DESTROY. |
| 41 ** |
| 42 ** The TEST primitive includes a "batch" number. The TEST primitive |
| 43 ** will only see elements that were inserted before the last change |
| 44 ** in the batch number. In other words, if an INSERT occurs between |
| 45 ** two TESTs where the TESTs have the same batch nubmer, then the |
| 46 ** value added by the INSERT will not be visible to the second TEST. |
| 47 ** The initial batch number is zero, so if the very first TEST contains |
| 48 ** a non-zero batch number, it will see all prior INSERTs. |
| 49 ** |
| 50 ** No INSERTs may occurs after a SMALLEST. An assertion will fail if |
| 51 ** that is attempted. |
| 52 ** |
| 53 ** The cost of an INSERT is roughly constant. (Sometimes new memory |
| 54 ** has to be allocated on an INSERT.) The cost of a TEST with a new |
| 55 ** batch number is O(NlogN) where N is the number of elements in the RowSet. |
| 56 ** The cost of a TEST using the same batch number is O(logN). The cost |
| 57 ** of the first SMALLEST is O(NlogN). Second and subsequent SMALLEST |
| 58 ** primitives are constant time. The cost of DESTROY is O(N). |
| 59 ** |
| 60 ** TEST and SMALLEST may not be used by the same RowSet. This used to |
| 61 ** be possible, but the feature was not used, so it was removed in order |
| 62 ** to simplify the code. |
| 63 */ |
| 64 #include "sqliteInt.h" |
| 65 |
| 66 |
| 67 /* |
| 68 ** Target size for allocation chunks. |
| 69 */ |
| 70 #define ROWSET_ALLOCATION_SIZE 1024 |
| 71 |
| 72 /* |
| 73 ** The number of rowset entries per allocation chunk. |
| 74 */ |
| 75 #define ROWSET_ENTRY_PER_CHUNK \ |
| 76 ((ROWSET_ALLOCATION_SIZE-8)/sizeof(struct RowSetEntry)) |
| 77 |
| 78 /* |
| 79 ** Each entry in a RowSet is an instance of the following object. |
| 80 ** |
| 81 ** This same object is reused to store a linked list of trees of RowSetEntry |
| 82 ** objects. In that alternative use, pRight points to the next entry |
| 83 ** in the list, pLeft points to the tree, and v is unused. The |
| 84 ** RowSet.pForest value points to the head of this forest list. |
| 85 */ |
| 86 struct RowSetEntry { |
| 87 i64 v; /* ROWID value for this entry */ |
| 88 struct RowSetEntry *pRight; /* Right subtree (larger entries) or list */ |
| 89 struct RowSetEntry *pLeft; /* Left subtree (smaller entries) */ |
| 90 }; |
| 91 |
| 92 /* |
| 93 ** RowSetEntry objects are allocated in large chunks (instances of the |
| 94 ** following structure) to reduce memory allocation overhead. The |
| 95 ** chunks are kept on a linked list so that they can be deallocated |
| 96 ** when the RowSet is destroyed. |
| 97 */ |
| 98 struct RowSetChunk { |
| 99 struct RowSetChunk *pNextChunk; /* Next chunk on list of them all */ |
| 100 struct RowSetEntry aEntry[ROWSET_ENTRY_PER_CHUNK]; /* Allocated entries */ |
| 101 }; |
| 102 |
| 103 /* |
| 104 ** A RowSet in an instance of the following structure. |
| 105 ** |
| 106 ** A typedef of this structure if found in sqliteInt.h. |
| 107 */ |
| 108 struct RowSet { |
| 109 struct RowSetChunk *pChunk; /* List of all chunk allocations */ |
| 110 sqlite3 *db; /* The database connection */ |
| 111 struct RowSetEntry *pEntry; /* List of entries using pRight */ |
| 112 struct RowSetEntry *pLast; /* Last entry on the pEntry list */ |
| 113 struct RowSetEntry *pFresh; /* Source of new entry objects */ |
| 114 struct RowSetEntry *pForest; /* List of binary trees of entries */ |
| 115 u16 nFresh; /* Number of objects on pFresh */ |
| 116 u16 rsFlags; /* Various flags */ |
| 117 int iBatch; /* Current insert batch */ |
| 118 }; |
| 119 |
| 120 /* |
| 121 ** Allowed values for RowSet.rsFlags |
| 122 */ |
| 123 #define ROWSET_SORTED 0x01 /* True if RowSet.pEntry is sorted */ |
| 124 #define ROWSET_NEXT 0x02 /* True if sqlite3RowSetNext() has been called */ |
| 125 |
| 126 /* |
| 127 ** Turn bulk memory into a RowSet object. N bytes of memory |
| 128 ** are available at pSpace. The db pointer is used as a memory context |
| 129 ** for any subsequent allocations that need to occur. |
| 130 ** Return a pointer to the new RowSet object. |
| 131 ** |
| 132 ** It must be the case that N is sufficient to make a Rowset. If not |
| 133 ** an assertion fault occurs. |
| 134 ** |
| 135 ** If N is larger than the minimum, use the surplus as an initial |
| 136 ** allocation of entries available to be filled. |
| 137 */ |
| 138 RowSet *sqlite3RowSetInit(sqlite3 *db, void *pSpace, unsigned int N){ |
| 139 RowSet *p; |
| 140 assert( N >= ROUND8(sizeof(*p)) ); |
| 141 p = pSpace; |
| 142 p->pChunk = 0; |
| 143 p->db = db; |
| 144 p->pEntry = 0; |
| 145 p->pLast = 0; |
| 146 p->pForest = 0; |
| 147 p->pFresh = (struct RowSetEntry*)(ROUND8(sizeof(*p)) + (char*)p); |
| 148 p->nFresh = (u16)((N - ROUND8(sizeof(*p)))/sizeof(struct RowSetEntry)); |
| 149 p->rsFlags = ROWSET_SORTED; |
| 150 p->iBatch = 0; |
| 151 return p; |
| 152 } |
| 153 |
| 154 /* |
| 155 ** Deallocate all chunks from a RowSet. This frees all memory that |
| 156 ** the RowSet has allocated over its lifetime. This routine is |
| 157 ** the destructor for the RowSet. |
| 158 */ |
| 159 void sqlite3RowSetClear(RowSet *p){ |
| 160 struct RowSetChunk *pChunk, *pNextChunk; |
| 161 for(pChunk=p->pChunk; pChunk; pChunk = pNextChunk){ |
| 162 pNextChunk = pChunk->pNextChunk; |
| 163 sqlite3DbFree(p->db, pChunk); |
| 164 } |
| 165 p->pChunk = 0; |
| 166 p->nFresh = 0; |
| 167 p->pEntry = 0; |
| 168 p->pLast = 0; |
| 169 p->pForest = 0; |
| 170 p->rsFlags = ROWSET_SORTED; |
| 171 } |
| 172 |
| 173 /* |
| 174 ** Allocate a new RowSetEntry object that is associated with the |
| 175 ** given RowSet. Return a pointer to the new and completely uninitialized |
| 176 ** objected. |
| 177 ** |
| 178 ** In an OOM situation, the RowSet.db->mallocFailed flag is set and this |
| 179 ** routine returns NULL. |
| 180 */ |
| 181 static struct RowSetEntry *rowSetEntryAlloc(RowSet *p){ |
| 182 assert( p!=0 ); |
| 183 if( p->nFresh==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
| 184 /* We could allocate a fresh RowSetEntry each time one is needed, but it |
| 185 ** is more efficient to pull a preallocated entry from the pool */ |
| 186 struct RowSetChunk *pNew; |
| 187 pNew = sqlite3DbMallocRawNN(p->db, sizeof(*pNew)); |
| 188 if( pNew==0 ){ |
| 189 return 0; |
| 190 } |
| 191 pNew->pNextChunk = p->pChunk; |
| 192 p->pChunk = pNew; |
| 193 p->pFresh = pNew->aEntry; |
| 194 p->nFresh = ROWSET_ENTRY_PER_CHUNK; |
| 195 } |
| 196 p->nFresh--; |
| 197 return p->pFresh++; |
| 198 } |
| 199 |
| 200 /* |
| 201 ** Insert a new value into a RowSet. |
| 202 ** |
| 203 ** The mallocFailed flag of the database connection is set if a |
| 204 ** memory allocation fails. |
| 205 */ |
| 206 void sqlite3RowSetInsert(RowSet *p, i64 rowid){ |
| 207 struct RowSetEntry *pEntry; /* The new entry */ |
| 208 struct RowSetEntry *pLast; /* The last prior entry */ |
| 209 |
| 210 /* This routine is never called after sqlite3RowSetNext() */ |
| 211 assert( p!=0 && (p->rsFlags & ROWSET_NEXT)==0 ); |
| 212 |
| 213 pEntry = rowSetEntryAlloc(p); |
| 214 if( pEntry==0 ) return; |
| 215 pEntry->v = rowid; |
| 216 pEntry->pRight = 0; |
| 217 pLast = p->pLast; |
| 218 if( pLast ){ |
| 219 if( rowid<=pLast->v ){ /*OPTIMIZATION-IF-FALSE*/ |
| 220 /* Avoid unnecessary sorts by preserving the ROWSET_SORTED flags |
| 221 ** where possible */ |
| 222 p->rsFlags &= ~ROWSET_SORTED; |
| 223 } |
| 224 pLast->pRight = pEntry; |
| 225 }else{ |
| 226 p->pEntry = pEntry; |
| 227 } |
| 228 p->pLast = pEntry; |
| 229 } |
| 230 |
| 231 /* |
| 232 ** Merge two lists of RowSetEntry objects. Remove duplicates. |
| 233 ** |
| 234 ** The input lists are connected via pRight pointers and are |
| 235 ** assumed to each already be in sorted order. |
| 236 */ |
| 237 static struct RowSetEntry *rowSetEntryMerge( |
| 238 struct RowSetEntry *pA, /* First sorted list to be merged */ |
| 239 struct RowSetEntry *pB /* Second sorted list to be merged */ |
| 240 ){ |
| 241 struct RowSetEntry head; |
| 242 struct RowSetEntry *pTail; |
| 243 |
| 244 pTail = &head; |
| 245 assert( pA!=0 && pB!=0 ); |
| 246 for(;;){ |
| 247 assert( pA->pRight==0 || pA->v<=pA->pRight->v ); |
| 248 assert( pB->pRight==0 || pB->v<=pB->pRight->v ); |
| 249 if( pA->v<=pB->v ){ |
| 250 if( pA->v<pB->v ) pTail = pTail->pRight = pA; |
| 251 pA = pA->pRight; |
| 252 if( pA==0 ){ |
| 253 pTail->pRight = pB; |
| 254 break; |
| 255 } |
| 256 }else{ |
| 257 pTail = pTail->pRight = pB; |
| 258 pB = pB->pRight; |
| 259 if( pB==0 ){ |
| 260 pTail->pRight = pA; |
| 261 break; |
| 262 } |
| 263 } |
| 264 } |
| 265 return head.pRight; |
| 266 } |
| 267 |
| 268 /* |
| 269 ** Sort all elements on the list of RowSetEntry objects into order of |
| 270 ** increasing v. |
| 271 */ |
| 272 static struct RowSetEntry *rowSetEntrySort(struct RowSetEntry *pIn){ |
| 273 unsigned int i; |
| 274 struct RowSetEntry *pNext, *aBucket[40]; |
| 275 |
| 276 memset(aBucket, 0, sizeof(aBucket)); |
| 277 while( pIn ){ |
| 278 pNext = pIn->pRight; |
| 279 pIn->pRight = 0; |
| 280 for(i=0; aBucket[i]; i++){ |
| 281 pIn = rowSetEntryMerge(aBucket[i], pIn); |
| 282 aBucket[i] = 0; |
| 283 } |
| 284 aBucket[i] = pIn; |
| 285 pIn = pNext; |
| 286 } |
| 287 pIn = aBucket[0]; |
| 288 for(i=1; i<sizeof(aBucket)/sizeof(aBucket[0]); i++){ |
| 289 if( aBucket[i]==0 ) continue; |
| 290 pIn = pIn ? rowSetEntryMerge(pIn, aBucket[i]) : aBucket[i]; |
| 291 } |
| 292 return pIn; |
| 293 } |
| 294 |
| 295 |
| 296 /* |
| 297 ** The input, pIn, is a binary tree (or subtree) of RowSetEntry objects. |
| 298 ** Convert this tree into a linked list connected by the pRight pointers |
| 299 ** and return pointers to the first and last elements of the new list. |
| 300 */ |
| 301 static void rowSetTreeToList( |
| 302 struct RowSetEntry *pIn, /* Root of the input tree */ |
| 303 struct RowSetEntry **ppFirst, /* Write head of the output list here */ |
| 304 struct RowSetEntry **ppLast /* Write tail of the output list here */ |
| 305 ){ |
| 306 assert( pIn!=0 ); |
| 307 if( pIn->pLeft ){ |
| 308 struct RowSetEntry *p; |
| 309 rowSetTreeToList(pIn->pLeft, ppFirst, &p); |
| 310 p->pRight = pIn; |
| 311 }else{ |
| 312 *ppFirst = pIn; |
| 313 } |
| 314 if( pIn->pRight ){ |
| 315 rowSetTreeToList(pIn->pRight, &pIn->pRight, ppLast); |
| 316 }else{ |
| 317 *ppLast = pIn; |
| 318 } |
| 319 assert( (*ppLast)->pRight==0 ); |
| 320 } |
| 321 |
| 322 |
| 323 /* |
| 324 ** Convert a sorted list of elements (connected by pRight) into a binary |
| 325 ** tree with depth of iDepth. A depth of 1 means the tree contains a single |
| 326 ** node taken from the head of *ppList. A depth of 2 means a tree with |
| 327 ** three nodes. And so forth. |
| 328 ** |
| 329 ** Use as many entries from the input list as required and update the |
| 330 ** *ppList to point to the unused elements of the list. If the input |
| 331 ** list contains too few elements, then construct an incomplete tree |
| 332 ** and leave *ppList set to NULL. |
| 333 ** |
| 334 ** Return a pointer to the root of the constructed binary tree. |
| 335 */ |
| 336 static struct RowSetEntry *rowSetNDeepTree( |
| 337 struct RowSetEntry **ppList, |
| 338 int iDepth |
| 339 ){ |
| 340 struct RowSetEntry *p; /* Root of the new tree */ |
| 341 struct RowSetEntry *pLeft; /* Left subtree */ |
| 342 if( *ppList==0 ){ /*OPTIMIZATION-IF-TRUE*/ |
| 343 /* Prevent unnecessary deep recursion when we run out of entries */ |
| 344 return 0; |
| 345 } |
| 346 if( iDepth>1 ){ /*OPTIMIZATION-IF-TRUE*/ |
| 347 /* This branch causes a *balanced* tree to be generated. A valid tree |
| 348 ** is still generated without this branch, but the tree is wildly |
| 349 ** unbalanced and inefficient. */ |
| 350 pLeft = rowSetNDeepTree(ppList, iDepth-1); |
| 351 p = *ppList; |
| 352 if( p==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
| 353 /* It is safe to always return here, but the resulting tree |
| 354 ** would be unbalanced */ |
| 355 return pLeft; |
| 356 } |
| 357 p->pLeft = pLeft; |
| 358 *ppList = p->pRight; |
| 359 p->pRight = rowSetNDeepTree(ppList, iDepth-1); |
| 360 }else{ |
| 361 p = *ppList; |
| 362 *ppList = p->pRight; |
| 363 p->pLeft = p->pRight = 0; |
| 364 } |
| 365 return p; |
| 366 } |
| 367 |
| 368 /* |
| 369 ** Convert a sorted list of elements into a binary tree. Make the tree |
| 370 ** as deep as it needs to be in order to contain the entire list. |
| 371 */ |
| 372 static struct RowSetEntry *rowSetListToTree(struct RowSetEntry *pList){ |
| 373 int iDepth; /* Depth of the tree so far */ |
| 374 struct RowSetEntry *p; /* Current tree root */ |
| 375 struct RowSetEntry *pLeft; /* Left subtree */ |
| 376 |
| 377 assert( pList!=0 ); |
| 378 p = pList; |
| 379 pList = p->pRight; |
| 380 p->pLeft = p->pRight = 0; |
| 381 for(iDepth=1; pList; iDepth++){ |
| 382 pLeft = p; |
| 383 p = pList; |
| 384 pList = p->pRight; |
| 385 p->pLeft = pLeft; |
| 386 p->pRight = rowSetNDeepTree(&pList, iDepth); |
| 387 } |
| 388 return p; |
| 389 } |
| 390 |
| 391 /* |
| 392 ** Extract the smallest element from the RowSet. |
| 393 ** Write the element into *pRowid. Return 1 on success. Return |
| 394 ** 0 if the RowSet is already empty. |
| 395 ** |
| 396 ** After this routine has been called, the sqlite3RowSetInsert() |
| 397 ** routine may not be called again. |
| 398 ** |
| 399 ** This routine may not be called after sqlite3RowSetTest() has |
| 400 ** been used. Older versions of RowSet allowed that, but as the |
| 401 ** capability was not used by the code generator, it was removed |
| 402 ** for code economy. |
| 403 */ |
| 404 int sqlite3RowSetNext(RowSet *p, i64 *pRowid){ |
| 405 assert( p!=0 ); |
| 406 assert( p->pForest==0 ); /* Cannot be used with sqlite3RowSetText() */ |
| 407 |
| 408 /* Merge the forest into a single sorted list on first call */ |
| 409 if( (p->rsFlags & ROWSET_NEXT)==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
| 410 if( (p->rsFlags & ROWSET_SORTED)==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
| 411 p->pEntry = rowSetEntrySort(p->pEntry); |
| 412 } |
| 413 p->rsFlags |= ROWSET_SORTED|ROWSET_NEXT; |
| 414 } |
| 415 |
| 416 /* Return the next entry on the list */ |
| 417 if( p->pEntry ){ |
| 418 *pRowid = p->pEntry->v; |
| 419 p->pEntry = p->pEntry->pRight; |
| 420 if( p->pEntry==0 ){ /*OPTIMIZATION-IF-TRUE*/ |
| 421 /* Free memory immediately, rather than waiting on sqlite3_finalize() */ |
| 422 sqlite3RowSetClear(p); |
| 423 } |
| 424 return 1; |
| 425 }else{ |
| 426 return 0; |
| 427 } |
| 428 } |
| 429 |
| 430 /* |
| 431 ** Check to see if element iRowid was inserted into the rowset as |
| 432 ** part of any insert batch prior to iBatch. Return 1 or 0. |
| 433 ** |
| 434 ** If this is the first test of a new batch and if there exist entries |
| 435 ** on pRowSet->pEntry, then sort those entries into the forest at |
| 436 ** pRowSet->pForest so that they can be tested. |
| 437 */ |
| 438 int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 iRowid){ |
| 439 struct RowSetEntry *p, *pTree; |
| 440 |
| 441 /* This routine is never called after sqlite3RowSetNext() */ |
| 442 assert( pRowSet!=0 && (pRowSet->rsFlags & ROWSET_NEXT)==0 ); |
| 443 |
| 444 /* Sort entries into the forest on the first test of a new batch. |
| 445 ** To save unnecessary work, only do this when the batch number changes. |
| 446 */ |
| 447 if( iBatch!=pRowSet->iBatch ){ /*OPTIMIZATION-IF-FALSE*/ |
| 448 p = pRowSet->pEntry; |
| 449 if( p ){ |
| 450 struct RowSetEntry **ppPrevTree = &pRowSet->pForest; |
| 451 if( (pRowSet->rsFlags & ROWSET_SORTED)==0 ){ /*OPTIMIZATION-IF-FALSE*/ |
| 452 /* Only sort the current set of entiries if they need it */ |
| 453 p = rowSetEntrySort(p); |
| 454 } |
| 455 for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){ |
| 456 ppPrevTree = &pTree->pRight; |
| 457 if( pTree->pLeft==0 ){ |
| 458 pTree->pLeft = rowSetListToTree(p); |
| 459 break; |
| 460 }else{ |
| 461 struct RowSetEntry *pAux, *pTail; |
| 462 rowSetTreeToList(pTree->pLeft, &pAux, &pTail); |
| 463 pTree->pLeft = 0; |
| 464 p = rowSetEntryMerge(pAux, p); |
| 465 } |
| 466 } |
| 467 if( pTree==0 ){ |
| 468 *ppPrevTree = pTree = rowSetEntryAlloc(pRowSet); |
| 469 if( pTree ){ |
| 470 pTree->v = 0; |
| 471 pTree->pRight = 0; |
| 472 pTree->pLeft = rowSetListToTree(p); |
| 473 } |
| 474 } |
| 475 pRowSet->pEntry = 0; |
| 476 pRowSet->pLast = 0; |
| 477 pRowSet->rsFlags |= ROWSET_SORTED; |
| 478 } |
| 479 pRowSet->iBatch = iBatch; |
| 480 } |
| 481 |
| 482 /* Test to see if the iRowid value appears anywhere in the forest. |
| 483 ** Return 1 if it does and 0 if not. |
| 484 */ |
| 485 for(pTree = pRowSet->pForest; pTree; pTree=pTree->pRight){ |
| 486 p = pTree->pLeft; |
| 487 while( p ){ |
| 488 if( p->v<iRowid ){ |
| 489 p = p->pRight; |
| 490 }else if( p->v>iRowid ){ |
| 491 p = p->pLeft; |
| 492 }else{ |
| 493 return 1; |
| 494 } |
| 495 } |
| 496 } |
| 497 return 0; |
| 498 } |
OLD | NEW |