Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: third_party/sqlite/src/src/btree.c

Issue 2751253002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/sqlite/src/src/btree.h ('k') | third_party/sqlite/src/src/btreeInt.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ** 2004 April 6 2 ** 2004 April 6
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 break; 343 break;
344 } 344 }
345 } 345 }
346 346
347 /* If the above search did not find a BtLock struct associating Btree p 347 /* If the above search did not find a BtLock struct associating Btree p
348 ** with table iTable, allocate one and link it into the list. 348 ** with table iTable, allocate one and link it into the list.
349 */ 349 */
350 if( !pLock ){ 350 if( !pLock ){
351 pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); 351 pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
352 if( !pLock ){ 352 if( !pLock ){
353 return SQLITE_NOMEM; 353 return SQLITE_NOMEM_BKPT;
354 } 354 }
355 pLock->iTable = iTable; 355 pLock->iTable = iTable;
356 pLock->pBtree = p; 356 pLock->pBtree = p;
357 pLock->pNext = pBt->pLock; 357 pLock->pNext = pBt->pLock;
358 pBt->pLock = pLock; 358 pBt->pLock = pLock;
359 } 359 }
360 360
361 /* Set the BtLock.eLock variable to the maximum of the current lock 361 /* Set the BtLock.eLock variable to the maximum of the current lock
362 ** and the requested lock. This means if a write-lock was already held 362 ** and the requested lock. This means if a write-lock was already held
363 ** and a read-lock requested, we don't incorrectly downgrade the lock. 363 ** and a read-lock requested, we don't incorrectly downgrade the lock.
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
443 443
444 /* 444 /*
445 ***** This routine is used inside of assert() only **** 445 ***** This routine is used inside of assert() only ****
446 ** 446 **
447 ** Verify that the cursor holds the mutex on its BtShared 447 ** Verify that the cursor holds the mutex on its BtShared
448 */ 448 */
449 #ifdef SQLITE_DEBUG 449 #ifdef SQLITE_DEBUG
450 static int cursorHoldsMutex(BtCursor *p){ 450 static int cursorHoldsMutex(BtCursor *p){
451 return sqlite3_mutex_held(p->pBt->mutex); 451 return sqlite3_mutex_held(p->pBt->mutex);
452 } 452 }
453
454 /* Verify that the cursor and the BtShared agree about what is the current
455 ** database connetion. This is important in shared-cache mode. If the database
456 ** connection pointers get out-of-sync, it is possible for routines like
457 ** btreeInitPage() to reference an stale connection pointer that references a
458 ** a connection that has already closed. This routine is used inside assert()
459 ** statements only and for the purpose of double-checking that the btree code
460 ** does keep the database connection pointers up-to-date.
461 */
462 static int cursorOwnsBtShared(BtCursor *p){
463 assert( cursorHoldsMutex(p) );
464 return (p->pBtree->db==p->pBt->db);
465 }
453 #endif 466 #endif
454 467
455 /* 468 /*
456 ** Invalidate the overflow cache of the cursor passed as the first argument. 469 ** Invalidate the overflow cache of the cursor passed as the first argument.
457 ** on the shared btree structure pBt. 470 ** on the shared btree structure pBt.
458 */ 471 */
459 #define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) 472 #define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl)
460 473
461 /* 474 /*
462 ** Invalidate the overflow page-list cache for all cursors opened 475 ** Invalidate the overflow page-list cache for all cursors opened
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
542 ** optimization 2 above is omitted if the corresponding bit is already 555 ** optimization 2 above is omitted if the corresponding bit is already
543 ** set in BtShared.pHasContent. The contents of the bitvec are cleared 556 ** set in BtShared.pHasContent. The contents of the bitvec are cleared
544 ** at the end of every transaction. 557 ** at the end of every transaction.
545 */ 558 */
546 static int btreeSetHasContent(BtShared *pBt, Pgno pgno){ 559 static int btreeSetHasContent(BtShared *pBt, Pgno pgno){
547 int rc = SQLITE_OK; 560 int rc = SQLITE_OK;
548 if( !pBt->pHasContent ){ 561 if( !pBt->pHasContent ){
549 assert( pgno<=pBt->nPage ); 562 assert( pgno<=pBt->nPage );
550 pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage); 563 pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage);
551 if( !pBt->pHasContent ){ 564 if( !pBt->pHasContent ){
552 rc = SQLITE_NOMEM; 565 rc = SQLITE_NOMEM_BKPT;
553 } 566 }
554 } 567 }
555 if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){ 568 if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){
556 rc = sqlite3BitvecSet(pBt->pHasContent, pgno); 569 rc = sqlite3BitvecSet(pBt->pHasContent, pgno);
557 } 570 }
558 return rc; 571 return rc;
559 } 572 }
560 573
561 /* 574 /*
562 ** Query the BtShared.pHasContent vector. 575 ** Query the BtShared.pHasContent vector.
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
598 ** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error 611 ** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error
599 ** code otherwise. 612 ** code otherwise.
600 ** 613 **
601 ** If the cursor is open on an intkey table, then the integer key 614 ** If the cursor is open on an intkey table, then the integer key
602 ** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to 615 ** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to
603 ** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is 616 ** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is
604 ** set to point to a malloced buffer pCur->nKey bytes in size containing 617 ** set to point to a malloced buffer pCur->nKey bytes in size containing
605 ** the key. 618 ** the key.
606 */ 619 */
607 static int saveCursorKey(BtCursor *pCur){ 620 static int saveCursorKey(BtCursor *pCur){
608 int rc; 621 int rc = SQLITE_OK;
609 assert( CURSOR_VALID==pCur->eState ); 622 assert( CURSOR_VALID==pCur->eState );
610 assert( 0==pCur->pKey ); 623 assert( 0==pCur->pKey );
611 assert( cursorHoldsMutex(pCur) ); 624 assert( cursorHoldsMutex(pCur) );
612 625
613 rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); 626 if( pCur->curIntKey ){
614 assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ 627 /* Only the rowid is required for a table btree */
615 628 pCur->nKey = sqlite3BtreeIntegerKey(pCur);
616 /* If this is an intKey table, then the above call to BtreeKeySize() 629 }else{
617 ** stores the integer key in pCur->nKey. In this case this value is 630 /* For an index btree, save the complete key content */
618 ** all that is required. Otherwise, if pCur is not open on an intKey 631 void *pKey;
619 ** table, then malloc space for and store the pCur->nKey bytes of key 632 pCur->nKey = sqlite3BtreePayloadSize(pCur);
620 ** data. */ 633 pKey = sqlite3Malloc( pCur->nKey );
621 if( 0==pCur->curIntKey ){
622 void *pKey = sqlite3Malloc( pCur->nKey );
623 if( pKey ){ 634 if( pKey ){
624 rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); 635 rc = sqlite3BtreePayload(pCur, 0, (int)pCur->nKey, pKey);
625 if( rc==SQLITE_OK ){ 636 if( rc==SQLITE_OK ){
626 pCur->pKey = pKey; 637 pCur->pKey = pKey;
627 }else{ 638 }else{
628 sqlite3_free(pKey); 639 sqlite3_free(pKey);
629 } 640 }
630 }else{ 641 }else{
631 rc = SQLITE_NOMEM; 642 rc = SQLITE_NOMEM_BKPT;
632 } 643 }
633 } 644 }
634 assert( !pCur->curIntKey || !pCur->pKey ); 645 assert( !pCur->curIntKey || !pCur->pKey );
635 return rc; 646 return rc;
636 } 647 }
637 648
638 /* 649 /*
639 ** Save the current cursor position in the variables BtCursor.nKey 650 ** Save the current cursor position in the variables BtCursor.nKey
640 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. 651 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
641 ** 652 **
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
745 */ 756 */
746 static int btreeMoveto( 757 static int btreeMoveto(
747 BtCursor *pCur, /* Cursor open on the btree to be searched */ 758 BtCursor *pCur, /* Cursor open on the btree to be searched */
748 const void *pKey, /* Packed key if the btree is an index */ 759 const void *pKey, /* Packed key if the btree is an index */
749 i64 nKey, /* Integer key for tables. Size of pKey for indices */ 760 i64 nKey, /* Integer key for tables. Size of pKey for indices */
750 int bias, /* Bias search to the high end */ 761 int bias, /* Bias search to the high end */
751 int *pRes /* Write search results here */ 762 int *pRes /* Write search results here */
752 ){ 763 ){
753 int rc; /* Status code */ 764 int rc; /* Status code */
754 UnpackedRecord *pIdxKey; /* Unpacked index key */ 765 UnpackedRecord *pIdxKey; /* Unpacked index key */
755 char aSpace[200]; /* Temp space for pIdxKey - to avoid a malloc */
756 char *pFree = 0;
757 766
758 if( pKey ){ 767 if( pKey ){
759 assert( nKey==(i64)(int)nKey ); 768 assert( nKey==(i64)(int)nKey );
760 pIdxKey = sqlite3VdbeAllocUnpackedRecord( 769 pIdxKey = sqlite3VdbeAllocUnpackedRecord(pCur->pKeyInfo);
761 pCur->pKeyInfo, aSpace, sizeof(aSpace), &pFree 770 if( pIdxKey==0 ) return SQLITE_NOMEM_BKPT;
762 );
763 if( pIdxKey==0 ) return SQLITE_NOMEM;
764 sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey); 771 sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey);
765 if( pIdxKey->nField==0 ){ 772 if( pIdxKey->nField==0 ){
766 sqlite3DbFree(pCur->pKeyInfo->db, pFree); 773 rc = SQLITE_CORRUPT_BKPT;
767 return SQLITE_CORRUPT_BKPT; 774 goto moveto_done;
768 } 775 }
769 }else{ 776 }else{
770 pIdxKey = 0; 777 pIdxKey = 0;
771 } 778 }
772 rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes); 779 rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes);
773 if( pFree ){ 780 moveto_done:
774 sqlite3DbFree(pCur->pKeyInfo->db, pFree); 781 if( pIdxKey ){
782 sqlite3DbFree(pCur->pKeyInfo->db, pIdxKey);
775 } 783 }
776 return rc; 784 return rc;
777 } 785 }
778 786
779 /* 787 /*
780 ** Restore the cursor to the position it was in (or as close to as possible) 788 ** Restore the cursor to the position it was in (or as close to as possible)
781 ** when saveCursorPosition() was called. Note that this call deletes the 789 ** when saveCursorPosition() was called. Note that this call deletes the
782 ** saved position info stored by saveCursorPosition(), so there can be 790 ** saved position info stored by saveCursorPosition(), so there can be
783 ** at most one effective restoreCursorPosition() call after each 791 ** at most one effective restoreCursorPosition() call after each
784 ** saveCursorPosition(). 792 ** saveCursorPosition().
785 */ 793 */
786 static int btreeRestoreCursorPosition(BtCursor *pCur){ 794 static int btreeRestoreCursorPosition(BtCursor *pCur){
787 int rc; 795 int rc;
788 int skipNext; 796 int skipNext;
789 assert( cursorHoldsMutex(pCur) ); 797 assert( cursorOwnsBtShared(pCur) );
790 assert( pCur->eState>=CURSOR_REQUIRESEEK ); 798 assert( pCur->eState>=CURSOR_REQUIRESEEK );
791 if( pCur->eState==CURSOR_FAULT ){ 799 if( pCur->eState==CURSOR_FAULT ){
792 return pCur->skipNext; 800 return pCur->skipNext;
793 } 801 }
794 pCur->eState = CURSOR_INVALID; 802 pCur->eState = CURSOR_INVALID;
795 rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext); 803 rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext);
796 if( rc==SQLITE_OK ){ 804 if( rc==SQLITE_OK ){
797 sqlite3_free(pCur->pKey); 805 sqlite3_free(pCur->pKey);
798 pCur->pKey = 0; 806 pCur->pKey = 0;
799 assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); 807 assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after
1068 ** all MemPage types and that references the cell by index rather than 1076 ** all MemPage types and that references the cell by index rather than
1069 ** by pointer. 1077 ** by pointer.
1070 */ 1078 */
1071 static void btreeParseCellPtrNoPayload( 1079 static void btreeParseCellPtrNoPayload(
1072 MemPage *pPage, /* Page containing the cell */ 1080 MemPage *pPage, /* Page containing the cell */
1073 u8 *pCell, /* Pointer to the cell text. */ 1081 u8 *pCell, /* Pointer to the cell text. */
1074 CellInfo *pInfo /* Fill in this structure */ 1082 CellInfo *pInfo /* Fill in this structure */
1075 ){ 1083 ){
1076 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1084 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1077 assert( pPage->leaf==0 ); 1085 assert( pPage->leaf==0 );
1078 assert( pPage->noPayload );
1079 assert( pPage->childPtrSize==4 ); 1086 assert( pPage->childPtrSize==4 );
1080 #ifndef SQLITE_DEBUG 1087 #ifndef SQLITE_DEBUG
1081 UNUSED_PARAMETER(pPage); 1088 UNUSED_PARAMETER(pPage);
1082 #endif 1089 #endif
1083 pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); 1090 pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey);
1084 pInfo->nPayload = 0; 1091 pInfo->nPayload = 0;
1085 pInfo->nLocal = 0; 1092 pInfo->nLocal = 0;
1086 pInfo->pPayload = 0; 1093 pInfo->pPayload = 0;
1087 return; 1094 return;
1088 } 1095 }
1089 static void btreeParseCellPtr( 1096 static void btreeParseCellPtr(
1090 MemPage *pPage, /* Page containing the cell */ 1097 MemPage *pPage, /* Page containing the cell */
1091 u8 *pCell, /* Pointer to the cell text. */ 1098 u8 *pCell, /* Pointer to the cell text. */
1092 CellInfo *pInfo /* Fill in this structure */ 1099 CellInfo *pInfo /* Fill in this structure */
1093 ){ 1100 ){
1094 u8 *pIter; /* For scanning through pCell */ 1101 u8 *pIter; /* For scanning through pCell */
1095 u32 nPayload; /* Number of bytes of cell payload */ 1102 u32 nPayload; /* Number of bytes of cell payload */
1096 u64 iKey; /* Extracted Key value */ 1103 u64 iKey; /* Extracted Key value */
1097 1104
1098 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1105 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1099 assert( pPage->leaf==0 || pPage->leaf==1 ); 1106 assert( pPage->leaf==0 || pPage->leaf==1 );
1100 assert( pPage->intKeyLeaf || pPage->noPayload );
1101 assert( pPage->noPayload==0 );
1102 assert( pPage->intKeyLeaf ); 1107 assert( pPage->intKeyLeaf );
1103 assert( pPage->childPtrSize==0 ); 1108 assert( pPage->childPtrSize==0 );
1104 pIter = pCell; 1109 pIter = pCell;
1105 1110
1106 /* The next block of code is equivalent to: 1111 /* The next block of code is equivalent to:
1107 ** 1112 **
1108 ** pIter += getVarint32(pIter, nPayload); 1113 ** pIter += getVarint32(pIter, nPayload);
1109 ** 1114 **
1110 ** The code is inlined to avoid a function call. 1115 ** The code is inlined to avoid a function call.
1111 */ 1116 */
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
1160 MemPage *pPage, /* Page containing the cell */ 1165 MemPage *pPage, /* Page containing the cell */
1161 u8 *pCell, /* Pointer to the cell text. */ 1166 u8 *pCell, /* Pointer to the cell text. */
1162 CellInfo *pInfo /* Fill in this structure */ 1167 CellInfo *pInfo /* Fill in this structure */
1163 ){ 1168 ){
1164 u8 *pIter; /* For scanning through pCell */ 1169 u8 *pIter; /* For scanning through pCell */
1165 u32 nPayload; /* Number of bytes of cell payload */ 1170 u32 nPayload; /* Number of bytes of cell payload */
1166 1171
1167 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1172 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1168 assert( pPage->leaf==0 || pPage->leaf==1 ); 1173 assert( pPage->leaf==0 || pPage->leaf==1 );
1169 assert( pPage->intKeyLeaf==0 ); 1174 assert( pPage->intKeyLeaf==0 );
1170 assert( pPage->noPayload==0 );
1171 pIter = pCell + pPage->childPtrSize; 1175 pIter = pCell + pPage->childPtrSize;
1172 nPayload = *pIter; 1176 nPayload = *pIter;
1173 if( nPayload>=0x80 ){ 1177 if( nPayload>=0x80 ){
1174 u8 *pEnd = &pIter[8]; 1178 u8 *pEnd = &pIter[8];
1175 nPayload &= 0x7f; 1179 nPayload &= 0x7f;
1176 do{ 1180 do{
1177 nPayload = (nPayload<<7) | (*++pIter & 0x7f); 1181 nPayload = (nPayload<<7) | (*++pIter & 0x7f);
1178 }while( *(pIter)>=0x80 && pIter<pEnd ); 1182 }while( *(pIter)>=0x80 && pIter<pEnd );
1179 } 1183 }
1180 pIter++; 1184 pIter++;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
1221 1225
1222 #ifdef SQLITE_DEBUG 1226 #ifdef SQLITE_DEBUG
1223 /* The value returned by this function should always be the same as 1227 /* The value returned by this function should always be the same as
1224 ** the (CellInfo.nSize) value found by doing a full parse of the 1228 ** the (CellInfo.nSize) value found by doing a full parse of the
1225 ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of 1229 ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
1226 ** this function verifies that this invariant is not violated. */ 1230 ** this function verifies that this invariant is not violated. */
1227 CellInfo debuginfo; 1231 CellInfo debuginfo;
1228 pPage->xParseCell(pPage, pCell, &debuginfo); 1232 pPage->xParseCell(pPage, pCell, &debuginfo);
1229 #endif 1233 #endif
1230 1234
1231 assert( pPage->noPayload==0 );
1232 nSize = *pIter; 1235 nSize = *pIter;
1233 if( nSize>=0x80 ){ 1236 if( nSize>=0x80 ){
1234 pEnd = &pIter[8]; 1237 pEnd = &pIter[8];
1235 nSize &= 0x7f; 1238 nSize &= 0x7f;
1236 do{ 1239 do{
1237 nSize = (nSize<<7) | (*++pIter & 0x7f); 1240 nSize = (nSize<<7) | (*++pIter & 0x7f);
1238 }while( *(pIter)>=0x80 && pIter<pEnd ); 1241 }while( *(pIter)>=0x80 && pIter<pEnd );
1239 } 1242 }
1240 pIter++; 1243 pIter++;
1241 if( pPage->intKey ){ 1244 if( pPage->intKey ){
(...skipping 350 matching lines...) Expand 10 before | Expand all | Expand 10 after
1592 } 1595 }
1593 1596
1594 /* The list of freeblocks must be in ascending order. Find the 1597 /* The list of freeblocks must be in ascending order. Find the
1595 ** spot on the list where iStart should be inserted. 1598 ** spot on the list where iStart should be inserted.
1596 */ 1599 */
1597 hdr = pPage->hdrOffset; 1600 hdr = pPage->hdrOffset;
1598 iPtr = hdr + 1; 1601 iPtr = hdr + 1;
1599 if( data[iPtr+1]==0 && data[iPtr]==0 ){ 1602 if( data[iPtr+1]==0 && data[iPtr]==0 ){
1600 iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ 1603 iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */
1601 }else{ 1604 }else{
1602 while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){ 1605 while( (iFreeBlk = get2byte(&data[iPtr]))<iStart ){
1603 if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT; 1606 if( iFreeBlk<iPtr+4 ){
1607 if( iFreeBlk==0 ) break;
1608 return SQLITE_CORRUPT_BKPT;
1609 }
1604 iPtr = iFreeBlk; 1610 iPtr = iFreeBlk;
1605 } 1611 }
1606 if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT; 1612 if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT;
1607 assert( iFreeBlk>iPtr || iFreeBlk==0 ); 1613 assert( iFreeBlk>iPtr || iFreeBlk==0 );
1608 1614
1609 /* At this point: 1615 /* At this point:
1610 ** iFreeBlk: First freeblock after iStart, or zero if none 1616 ** iFreeBlk: First freeblock after iStart, or zero if none
1611 ** iPtr: The address of a pointer to iFreeBlk 1617 ** iPtr: The address of a pointer to iFreeBlk
1612 ** 1618 **
1613 ** Check to see if iFreeBlk should be coalesced onto the end of iStart. 1619 ** Check to see if iFreeBlk should be coalesced onto the end of iStart.
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1670 BtShared *pBt; /* A copy of pPage->pBt */ 1676 BtShared *pBt; /* A copy of pPage->pBt */
1671 1677
1672 assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); 1678 assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
1673 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1679 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1674 pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); 1680 pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 );
1675 flagByte &= ~PTF_LEAF; 1681 flagByte &= ~PTF_LEAF;
1676 pPage->childPtrSize = 4-4*pPage->leaf; 1682 pPage->childPtrSize = 4-4*pPage->leaf;
1677 pPage->xCellSize = cellSizePtr; 1683 pPage->xCellSize = cellSizePtr;
1678 pBt = pPage->pBt; 1684 pBt = pPage->pBt;
1679 if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ 1685 if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
1680 /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior 1686 /* EVIDENCE-OF: R-07291-35328 A value of 5 (0x05) means the page is an
1681 ** table b-tree page. */ 1687 ** interior table b-tree page. */
1682 assert( (PTF_LEAFDATA|PTF_INTKEY)==5 ); 1688 assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
1683 /* EVIDENCE-OF: R-20501-61796 A value of 13 means the page is a leaf 1689 /* EVIDENCE-OF: R-26900-09176 A value of 13 (0x0d) means the page is a
1684 ** table b-tree page. */ 1690 ** leaf table b-tree page. */
1685 assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); 1691 assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
1686 pPage->intKey = 1; 1692 pPage->intKey = 1;
1687 if( pPage->leaf ){ 1693 if( pPage->leaf ){
1688 pPage->intKeyLeaf = 1; 1694 pPage->intKeyLeaf = 1;
1689 pPage->noPayload = 0;
1690 pPage->xParseCell = btreeParseCellPtr; 1695 pPage->xParseCell = btreeParseCellPtr;
1691 }else{ 1696 }else{
1692 pPage->intKeyLeaf = 0; 1697 pPage->intKeyLeaf = 0;
1693 pPage->noPayload = 1;
1694 pPage->xCellSize = cellSizePtrNoPayload; 1698 pPage->xCellSize = cellSizePtrNoPayload;
1695 pPage->xParseCell = btreeParseCellPtrNoPayload; 1699 pPage->xParseCell = btreeParseCellPtrNoPayload;
1696 } 1700 }
1697 pPage->maxLocal = pBt->maxLeaf; 1701 pPage->maxLocal = pBt->maxLeaf;
1698 pPage->minLocal = pBt->minLeaf; 1702 pPage->minLocal = pBt->minLeaf;
1699 }else if( flagByte==PTF_ZERODATA ){ 1703 }else if( flagByte==PTF_ZERODATA ){
1700 /* EVIDENCE-OF: R-27225-53936 A value of 2 means the page is an interior 1704 /* EVIDENCE-OF: R-43316-37308 A value of 2 (0x02) means the page is an
1701 ** index b-tree page. */ 1705 ** interior index b-tree page. */
1702 assert( (PTF_ZERODATA)==2 ); 1706 assert( (PTF_ZERODATA)==2 );
1703 /* EVIDENCE-OF: R-16571-11615 A value of 10 means the page is a leaf 1707 /* EVIDENCE-OF: R-59615-42828 A value of 10 (0x0a) means the page is a
1704 ** index b-tree page. */ 1708 ** leaf index b-tree page. */
1705 assert( (PTF_ZERODATA|PTF_LEAF)==10 ); 1709 assert( (PTF_ZERODATA|PTF_LEAF)==10 );
1706 pPage->intKey = 0; 1710 pPage->intKey = 0;
1707 pPage->intKeyLeaf = 0; 1711 pPage->intKeyLeaf = 0;
1708 pPage->noPayload = 0;
1709 pPage->xParseCell = btreeParseCellPtrIndex; 1712 pPage->xParseCell = btreeParseCellPtrIndex;
1710 pPage->maxLocal = pBt->maxLocal; 1713 pPage->maxLocal = pBt->maxLocal;
1711 pPage->minLocal = pBt->minLocal; 1714 pPage->minLocal = pBt->minLocal;
1712 }else{ 1715 }else{
1713 /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is 1716 /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
1714 ** an error. */ 1717 ** an error. */
1715 return SQLITE_CORRUPT_BKPT; 1718 return SQLITE_CORRUPT_BKPT;
1716 } 1719 }
1717 pPage->max1bytePayload = pBt->max1bytePayload; 1720 pPage->max1bytePayload = pBt->max1bytePayload;
1718 return SQLITE_OK; 1721 return SQLITE_OK;
(...skipping 11 matching lines...) Expand all
1730 static int btreeInitPage(MemPage *pPage){ 1733 static int btreeInitPage(MemPage *pPage){
1731 1734
1732 assert( pPage->pBt!=0 ); 1735 assert( pPage->pBt!=0 );
1733 assert( pPage->pBt->db!=0 ); 1736 assert( pPage->pBt->db!=0 );
1734 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1737 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1735 assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); 1738 assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
1736 assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); 1739 assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
1737 assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); 1740 assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
1738 1741
1739 if( !pPage->isInit ){ 1742 if( !pPage->isInit ){
1740 u16 pc; /* Address of a freeblock within pPage->aData[] */ 1743 int pc; /* Address of a freeblock within pPage->aData[] */
1741 u8 hdr; /* Offset to beginning of page header */ 1744 u8 hdr; /* Offset to beginning of page header */
1742 u8 *data; /* Equal to pPage->aData */ 1745 u8 *data; /* Equal to pPage->aData */
1743 BtShared *pBt; /* The main btree structure */ 1746 BtShared *pBt; /* The main btree structure */
1744 int usableSize; /* Amount of usable space on each page */ 1747 int usableSize; /* Amount of usable space on each page */
1745 u16 cellOffset; /* Offset from start of page to first cell pointer */ 1748 u16 cellOffset; /* Offset from start of page to first cell pointer */
1746 int nFree; /* Number of unused bytes on the page */ 1749 int nFree; /* Number of unused bytes on the page */
1747 int top; /* First byte of the cell content area */ 1750 int top; /* First byte of the cell content area */
1748 int iCellFirst; /* First allowable cell or freeblock offset */ 1751 int iCellFirst; /* First allowable cell or freeblock offset */
1749 int iCellLast; /* Last possible cell or freeblock offset */ 1752 int iCellLast; /* Last possible cell or freeblock offset */
1750 1753
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
1810 } 1813 }
1811 if( !pPage->leaf ) iCellLast++; 1814 if( !pPage->leaf ) iCellLast++;
1812 } 1815 }
1813 1816
1814 /* Compute the total free space on the page 1817 /* Compute the total free space on the page
1815 ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the 1818 ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
1816 ** start of the first freeblock on the page, or is zero if there are no 1819 ** start of the first freeblock on the page, or is zero if there are no
1817 ** freeblocks. */ 1820 ** freeblocks. */
1818 pc = get2byte(&data[hdr+1]); 1821 pc = get2byte(&data[hdr+1]);
1819 nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ 1822 nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */
1820 while( pc>0 ){ 1823 if( pc>0 ){
1821 u16 next, size; 1824 u32 next, size;
1822 if( pc<iCellFirst || pc>iCellLast ){ 1825 if( pc<iCellFirst ){
1823 /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will 1826 /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will
1824 ** always be at least one cell before the first freeblock. 1827 ** always be at least one cell before the first freeblock.
1825 **
1826 ** Or, the freeblock is off the end of the page
1827 */ 1828 */
1828 return SQLITE_CORRUPT_BKPT; 1829 return SQLITE_CORRUPT_BKPT;
1829 } 1830 }
1830 next = get2byte(&data[pc]); 1831 while( 1 ){
1831 size = get2byte(&data[pc+2]); 1832 if( pc>iCellLast ){
1832 if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){ 1833 return SQLITE_CORRUPT_BKPT; /* Freeblock off the end of the page */
1833 /* Free blocks must be in ascending order. And the last byte of 1834 }
1834 ** the free-block must lie on the database page. */ 1835 next = get2byte(&data[pc]);
1835 return SQLITE_CORRUPT_BKPT; 1836 size = get2byte(&data[pc+2]);
1837 nFree = nFree + size;
1838 if( next<=pc+size+3 ) break;
1839 pc = next;
1836 } 1840 }
1837 nFree = nFree + size; 1841 if( next>0 ){
1838 pc = next; 1842 return SQLITE_CORRUPT_BKPT; /* Freeblock not in ascending order */
1843 }
1844 if( pc+size>(unsigned int)usableSize ){
1845 return SQLITE_CORRUPT_BKPT; /* Last freeblock extends past page end */
1846 }
1839 } 1847 }
1840 1848
1841 /* At this point, nFree contains the sum of the offset to the start 1849 /* At this point, nFree contains the sum of the offset to the start
1842 ** of the cell-content area plus the number of free bytes within 1850 ** of the cell-content area plus the number of free bytes within
1843 ** the cell-content area. If this is greater than the usable-size 1851 ** the cell-content area. If this is greater than the usable-size
1844 ** of the page, then the page must be corrupted. This check also 1852 ** of the page, then the page must be corrupted. This check also
1845 ** serves to verify that the offset to the start of the cell-content 1853 ** serves to verify that the offset to the start of the cell-content
1846 ** area, according to the page header, lies within the page. 1854 ** area, according to the page header, lies within the page.
1847 */ 1855 */
1848 if( nFree>usableSize ){ 1856 if( nFree>usableSize ){
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after
2173 assert( (flags & BTREE_SINGLE)==0 || isTempDb ); 2181 assert( (flags & BTREE_SINGLE)==0 || isTempDb );
2174 2182
2175 if( isMemdb ){ 2183 if( isMemdb ){
2176 flags |= BTREE_MEMORY; 2184 flags |= BTREE_MEMORY;
2177 } 2185 }
2178 if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){ 2186 if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){
2179 vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB; 2187 vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB;
2180 } 2188 }
2181 p = sqlite3MallocZero(sizeof(Btree)); 2189 p = sqlite3MallocZero(sizeof(Btree));
2182 if( !p ){ 2190 if( !p ){
2183 return SQLITE_NOMEM; 2191 return SQLITE_NOMEM_BKPT;
2184 } 2192 }
2185 p->inTrans = TRANS_NONE; 2193 p->inTrans = TRANS_NONE;
2186 p->db = db; 2194 p->db = db;
2187 #ifndef SQLITE_OMIT_SHARED_CACHE 2195 #ifndef SQLITE_OMIT_SHARED_CACHE
2188 p->lock.pBtree = p; 2196 p->lock.pBtree = p;
2189 p->lock.iTable = 1; 2197 p->lock.iTable = 1;
2190 #endif 2198 #endif
2191 2199
2192 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 2200 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
2193 /* 2201 /*
2194 ** If this Btree is a candidate for shared cache, try to find an 2202 ** If this Btree is a candidate for shared cache, try to find an
2195 ** existing BtShared object that we can share with 2203 ** existing BtShared object that we can share with
2196 */ 2204 */
2197 if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){ 2205 if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
2198 if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ 2206 if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
2199 int nFilename = sqlite3Strlen30(zFilename)+1; 2207 int nFilename = sqlite3Strlen30(zFilename)+1;
2200 int nFullPathname = pVfs->mxPathname+1; 2208 int nFullPathname = pVfs->mxPathname+1;
2201 char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename)); 2209 char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename));
2202 MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) 2210 MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
2203 2211
2204 p->sharable = 1; 2212 p->sharable = 1;
2205 if( !zFullPathname ){ 2213 if( !zFullPathname ){
2206 sqlite3_free(p); 2214 sqlite3_free(p);
2207 return SQLITE_NOMEM; 2215 return SQLITE_NOMEM_BKPT;
2208 } 2216 }
2209 if( isMemdb ){ 2217 if( isMemdb ){
2210 memcpy(zFullPathname, zFilename, nFilename); 2218 memcpy(zFullPathname, zFilename, nFilename);
2211 }else{ 2219 }else{
2212 rc = sqlite3OsFullPathname(pVfs, zFilename, 2220 rc = sqlite3OsFullPathname(pVfs, zFilename,
2213 nFullPathname, zFullPathname); 2221 nFullPathname, zFullPathname);
2214 if( rc ){ 2222 if( rc ){
2215 sqlite3_free(zFullPathname); 2223 sqlite3_free(zFullPathname);
2216 sqlite3_free(p); 2224 sqlite3_free(p);
2217 return rc; 2225 return rc;
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
2265 ** when compiling on a different architecture. 2273 ** when compiling on a different architecture.
2266 */ 2274 */
2267 assert( sizeof(i64)==8 ); 2275 assert( sizeof(i64)==8 );
2268 assert( sizeof(u64)==8 ); 2276 assert( sizeof(u64)==8 );
2269 assert( sizeof(u32)==4 ); 2277 assert( sizeof(u32)==4 );
2270 assert( sizeof(u16)==2 ); 2278 assert( sizeof(u16)==2 );
2271 assert( sizeof(Pgno)==4 ); 2279 assert( sizeof(Pgno)==4 );
2272 2280
2273 pBt = sqlite3MallocZero( sizeof(*pBt) ); 2281 pBt = sqlite3MallocZero( sizeof(*pBt) );
2274 if( pBt==0 ){ 2282 if( pBt==0 ){
2275 rc = SQLITE_NOMEM; 2283 rc = SQLITE_NOMEM_BKPT;
2276 goto btree_open_out; 2284 goto btree_open_out;
2277 } 2285 }
2278 rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, 2286 rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
2279 EXTRA_SIZE, flags, vfsFlags, pageReinit); 2287 sizeof(MemPage), flags, vfsFlags, pageReinit);
2280 if( rc==SQLITE_OK ){ 2288 if( rc==SQLITE_OK ){
2281 sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap); 2289 sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
2282 rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); 2290 rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
2283 } 2291 }
2284 if( rc!=SQLITE_OK ){ 2292 if( rc!=SQLITE_OK ){
2285 goto btree_open_out; 2293 goto btree_open_out;
2286 } 2294 }
2287 pBt->openFlags = (u8)flags; 2295 pBt->openFlags = (u8)flags;
2288 pBt->db = db; 2296 pBt->db = db;
2289 sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt); 2297 sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2327 #endif 2335 #endif
2328 } 2336 }
2329 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); 2337 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
2330 if( rc ) goto btree_open_out; 2338 if( rc ) goto btree_open_out;
2331 pBt->usableSize = pBt->pageSize - nReserve; 2339 pBt->usableSize = pBt->pageSize - nReserve;
2332 assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ 2340 assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
2333 2341
2334 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 2342 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
2335 /* Add the new BtShared object to the linked list sharable BtShareds. 2343 /* Add the new BtShared object to the linked list sharable BtShareds.
2336 */ 2344 */
2345 pBt->nRef = 1;
2337 if( p->sharable ){ 2346 if( p->sharable ){
2338 MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) 2347 MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
2339 pBt->nRef = 1;
2340 MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);) 2348 MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);)
2341 if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ 2349 if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){
2342 pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); 2350 pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
2343 if( pBt->mutex==0 ){ 2351 if( pBt->mutex==0 ){
2344 rc = SQLITE_NOMEM; 2352 rc = SQLITE_NOMEM_BKPT;
2345 db->mallocFailed = 0;
2346 goto btree_open_out; 2353 goto btree_open_out;
2347 } 2354 }
2348 } 2355 }
2349 sqlite3_mutex_enter(mutexShared); 2356 sqlite3_mutex_enter(mutexShared);
2350 pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList); 2357 pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList);
2351 GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt; 2358 GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt;
2352 sqlite3_mutex_leave(mutexShared); 2359 sqlite3_mutex_leave(mutexShared);
2353 } 2360 }
2354 #endif 2361 #endif
2355 } 2362 }
2356 2363
2357 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 2364 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
2358 /* If the new Btree uses a sharable pBtShared, then link the new 2365 /* If the new Btree uses a sharable pBtShared, then link the new
2359 ** Btree into the list of all sharable Btrees for the same connection. 2366 ** Btree into the list of all sharable Btrees for the same connection.
2360 ** The list is kept in ascending order by pBt address. 2367 ** The list is kept in ascending order by pBt address.
2361 */ 2368 */
2362 if( p->sharable ){ 2369 if( p->sharable ){
2363 int i; 2370 int i;
2364 Btree *pSib; 2371 Btree *pSib;
2365 for(i=0; i<db->nDb; i++){ 2372 for(i=0; i<db->nDb; i++){
2366 if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){ 2373 if( (pSib = db->aDb[i].pBt)!=0 && pSib->sharable ){
2367 while( pSib->pPrev ){ pSib = pSib->pPrev; } 2374 while( pSib->pPrev ){ pSib = pSib->pPrev; }
2368 if( p->pBt<pSib->pBt ){ 2375 if( (uptr)p->pBt<(uptr)pSib->pBt ){
2369 p->pNext = pSib; 2376 p->pNext = pSib;
2370 p->pPrev = 0; 2377 p->pPrev = 0;
2371 pSib->pPrev = p; 2378 pSib->pPrev = p;
2372 }else{ 2379 }else{
2373 while( pSib->pNext && pSib->pNext->pBt<p->pBt ){ 2380 while( pSib->pNext && (uptr)pSib->pNext->pBt<(uptr)p->pBt ){
2374 pSib = pSib->pNext; 2381 pSib = pSib->pNext;
2375 } 2382 }
2376 p->pNext = pSib->pNext; 2383 p->pNext = pSib->pNext;
2377 p->pPrev = pSib; 2384 p->pPrev = pSib;
2378 if( p->pNext ){ 2385 if( p->pNext ){
2379 p->pNext->pPrev = p; 2386 p->pNext->pPrev = p;
2380 } 2387 }
2381 pSib->pNext = p; 2388 pSib->pNext = p;
2382 } 2389 }
2383 break; 2390 break;
2384 } 2391 }
2385 } 2392 }
2386 } 2393 }
2387 #endif 2394 #endif
2388 *ppBtree = p; 2395 *ppBtree = p;
2389 2396
2390 btree_open_out: 2397 btree_open_out:
2391 if( rc!=SQLITE_OK ){ 2398 if( rc!=SQLITE_OK ){
2392 if( pBt && pBt->pPager ){ 2399 if( pBt && pBt->pPager ){
2393 sqlite3PagerClose(pBt->pPager); 2400 sqlite3PagerClose(pBt->pPager, 0);
2394 } 2401 }
2395 sqlite3_free(pBt); 2402 sqlite3_free(pBt);
2396 sqlite3_free(p); 2403 sqlite3_free(p);
2397 *ppBtree = 0; 2404 *ppBtree = 0;
2398 }else{ 2405 }else{
2406 sqlite3_file *pFile;
2407
2399 /* If the B-Tree was successfully opened, set the pager-cache size to the 2408 /* If the B-Tree was successfully opened, set the pager-cache size to the
2400 ** default value. Except, when opening on an existing shared pager-cache, 2409 ** default value. Except, when opening on an existing shared pager-cache,
2401 ** do not change the pager-cache size. 2410 ** do not change the pager-cache size.
2402 */ 2411 */
2403 if( sqlite3BtreeSchema(p, 0, 0)==0 ){ 2412 if( sqlite3BtreeSchema(p, 0, 0)==0 ){
2404 sqlite3PagerSetCachesize(p->pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE); 2413 sqlite3PagerSetCachesize(p->pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE);
2405 } 2414 }
2415
2416 pFile = sqlite3PagerFile(pBt->pPager);
2417 if( pFile->pMethods ){
2418 sqlite3OsFileControlHint(pFile, SQLITE_FCNTL_PDB, (void*)&pBt->db);
2419 }
2406 } 2420 }
2407 if( mutexOpen ){ 2421 if( mutexOpen ){
2408 assert( sqlite3_mutex_held(mutexOpen) ); 2422 assert( sqlite3_mutex_held(mutexOpen) );
2409 sqlite3_mutex_leave(mutexOpen); 2423 sqlite3_mutex_leave(mutexOpen);
2410 } 2424 }
2425 assert( rc!=SQLITE_OK || sqlite3BtreeConnectionCount(*ppBtree)>0 );
2411 return rc; 2426 return rc;
2412 } 2427 }
2413 2428
2414 /* 2429 /*
2415 ** Decrement the BtShared.nRef counter. When it reaches zero, 2430 ** Decrement the BtShared.nRef counter. When it reaches zero,
2416 ** remove the BtShared structure from the sharing list. Return 2431 ** remove the BtShared structure from the sharing list. Return
2417 ** true if the BtShared.nRef counter reaches zero and return 2432 ** true if the BtShared.nRef counter reaches zero and return
2418 ** false if it is still positive. 2433 ** false if it is still positive.
2419 */ 2434 */
2420 static int removeFromSharingList(BtShared *pBt){ 2435 static int removeFromSharingList(BtShared *pBt){
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
2524 ** up the shared-btree. 2539 ** up the shared-btree.
2525 */ 2540 */
2526 assert( p->wantToLock==0 && p->locked==0 ); 2541 assert( p->wantToLock==0 && p->locked==0 );
2527 if( !p->sharable || removeFromSharingList(pBt) ){ 2542 if( !p->sharable || removeFromSharingList(pBt) ){
2528 /* The pBt is no longer on the sharing list, so we can access 2543 /* The pBt is no longer on the sharing list, so we can access
2529 ** it without having to hold the mutex. 2544 ** it without having to hold the mutex.
2530 ** 2545 **
2531 ** Clean out and delete the BtShared object. 2546 ** Clean out and delete the BtShared object.
2532 */ 2547 */
2533 assert( !pBt->pCursor ); 2548 assert( !pBt->pCursor );
2534 sqlite3PagerClose(pBt->pPager); 2549 sqlite3PagerClose(pBt->pPager, p->db);
2535 if( pBt->xFreeSchema && pBt->pSchema ){ 2550 if( pBt->xFreeSchema && pBt->pSchema ){
2536 pBt->xFreeSchema(pBt->pSchema); 2551 pBt->xFreeSchema(pBt->pSchema);
2537 } 2552 }
2538 sqlite3DbFree(0, pBt->pSchema); 2553 sqlite3DbFree(0, pBt->pSchema);
2539 freeTempSpace(pBt); 2554 freeTempSpace(pBt);
2540 sqlite3_free(pBt); 2555 sqlite3_free(pBt);
2541 } 2556 }
2542 2557
2543 #ifndef SQLITE_OMIT_SHARED_CACHE 2558 #ifndef SQLITE_OMIT_SHARED_CACHE
2544 assert( p->wantToLock==0 ); 2559 assert( p->wantToLock==0 );
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
2618 BtShared *pBt = p->pBt; 2633 BtShared *pBt = p->pBt;
2619 assert( sqlite3_mutex_held(p->db->mutex) ); 2634 assert( sqlite3_mutex_held(p->db->mutex) );
2620 sqlite3BtreeEnter(p); 2635 sqlite3BtreeEnter(p);
2621 sqlite3PagerSetFlags(pBt->pPager, pgFlags); 2636 sqlite3PagerSetFlags(pBt->pPager, pgFlags);
2622 sqlite3BtreeLeave(p); 2637 sqlite3BtreeLeave(p);
2623 return SQLITE_OK; 2638 return SQLITE_OK;
2624 } 2639 }
2625 #endif 2640 #endif
2626 2641
2627 /* 2642 /*
2628 ** Return TRUE if the given btree is set to safety level 1. In other
2629 ** words, return TRUE if no sync() occurs on the disk files.
2630 */
2631 int sqlite3BtreeSyncDisabled(Btree *p){
2632 BtShared *pBt = p->pBt;
2633 int rc;
2634 assert( sqlite3_mutex_held(p->db->mutex) );
2635 sqlite3BtreeEnter(p);
2636 assert( pBt && pBt->pPager );
2637 rc = sqlite3PagerNosync(pBt->pPager);
2638 sqlite3BtreeLeave(p);
2639 return rc;
2640 }
2641
2642 /*
2643 ** Change the default pages size and the number of reserved bytes per page. 2643 ** Change the default pages size and the number of reserved bytes per page.
2644 ** Or, if the page size has already been fixed, return SQLITE_READONLY 2644 ** Or, if the page size has already been fixed, return SQLITE_READONLY
2645 ** without changing anything. 2645 ** without changing anything.
2646 ** 2646 **
2647 ** The page size must be a power of 2 between 512 and 65536. If the page 2647 ** The page size must be a power of 2 between 512 and 65536. If the page
2648 ** size supplied does not meet this constraint then the page size is not 2648 ** size supplied does not meet this constraint then the page size is not
2649 ** changed. 2649 ** changed.
2650 ** 2650 **
2651 ** Page sizes are constrained to be a power of two so that the region 2651 ** Page sizes are constrained to be a power of two so that the region
2652 ** of the database file used for locking (beginning at PENDING_BYTE, 2652 ** of the database file used for locking (beginning at PENDING_BYTE,
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after
2877 ** The caller detects this and calls this function again. This is 2877 ** The caller detects this and calls this function again. This is
2878 ** required as the version of page 1 currently in the page1 buffer 2878 ** required as the version of page 1 currently in the page1 buffer
2879 ** may not be the latest version - there may be a newer one in the log 2879 ** may not be the latest version - there may be a newer one in the log
2880 ** file. 2880 ** file.
2881 */ 2881 */
2882 if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ 2882 if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){
2883 int isOpen = 0; 2883 int isOpen = 0;
2884 rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); 2884 rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen);
2885 if( rc!=SQLITE_OK ){ 2885 if( rc!=SQLITE_OK ){
2886 goto page1_init_failed; 2886 goto page1_init_failed;
2887 }else if( isOpen==0 ){ 2887 }else{
2888 releasePage(pPage1); 2888 #if SQLITE_DEFAULT_SYNCHRONOUS!=SQLITE_DEFAULT_WAL_SYNCHRONOUS
2889 return SQLITE_OK; 2889 sqlite3 *db;
2890 Db *pDb;
2891 if( (db=pBt->db)!=0 && (pDb=db->aDb)!=0 ){
2892 while( pDb->pBt==0 || pDb->pBt->pBt!=pBt ){ pDb++; }
2893 if( pDb->bSyncSet==0
2894 && pDb->safety_level==SQLITE_DEFAULT_SYNCHRONOUS+1
2895 ){
2896 pDb->safety_level = SQLITE_DEFAULT_WAL_SYNCHRONOUS+1;
2897 sqlite3PagerSetFlags(pBt->pPager,
2898 pDb->safety_level | (db->flags & PAGER_FLAGS_MASK));
2899 }
2900 }
2901 #endif
2902 if( isOpen==0 ){
2903 releasePage(pPage1);
2904 return SQLITE_OK;
2905 }
2890 } 2906 }
2891 rc = SQLITE_NOTADB; 2907 rc = SQLITE_NOTADB;
2892 } 2908 }
2893 #endif 2909 #endif
2894 2910
2895 /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload 2911 /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload
2896 ** fractions and the leaf payload fraction values must be 64, 32, and 32. 2912 ** fractions and the leaf payload fraction values must be 64, 32, and 32.
2897 ** 2913 **
2898 ** The original design allowed these amounts to vary, but as of 2914 ** The original design allowed these amounts to vary, but as of
2899 ** version 3.6.0, we require them to be fixed. 2915 ** version 3.6.0, we require them to be fixed.
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
3119 ** 3135 **
3120 ** Suppose there are two processes A and B. A has a read lock and B has 3136 ** Suppose there are two processes A and B. A has a read lock and B has
3121 ** a reserved lock. B tries to promote to exclusive but is blocked because 3137 ** a reserved lock. B tries to promote to exclusive but is blocked because
3122 ** of A's read lock. A tries to promote to reserved but is blocked by B. 3138 ** of A's read lock. A tries to promote to reserved but is blocked by B.
3123 ** One or the other of the two processes must give way or there can be 3139 ** One or the other of the two processes must give way or there can be
3124 ** no progress. By returning SQLITE_BUSY and not invoking the busy callback 3140 ** no progress. By returning SQLITE_BUSY and not invoking the busy callback
3125 ** when A already has a read lock, we encourage A to give up and let B 3141 ** when A already has a read lock, we encourage A to give up and let B
3126 ** proceed. 3142 ** proceed.
3127 */ 3143 */
3128 int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ 3144 int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
3129 sqlite3 *pBlock = 0;
3130 BtShared *pBt = p->pBt; 3145 BtShared *pBt = p->pBt;
3131 int rc = SQLITE_OK; 3146 int rc = SQLITE_OK;
3132 3147
3133 sqlite3BtreeEnter(p); 3148 sqlite3BtreeEnter(p);
3134 btreeIntegrity(p); 3149 btreeIntegrity(p);
3135 3150
3136 /* If the btree is already in a write-transaction, or it 3151 /* If the btree is already in a write-transaction, or it
3137 ** is already in a read-transaction and a read-transaction 3152 ** is already in a read-transaction and a read-transaction
3138 ** is requested, this is a no-op. 3153 ** is requested, this is a no-op.
3139 */ 3154 */
3140 if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ 3155 if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
3141 goto trans_begun; 3156 goto trans_begun;
3142 } 3157 }
3143 assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); 3158 assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 );
3144 3159
3145 /* Write transactions are not possible on a read-only database */ 3160 /* Write transactions are not possible on a read-only database */
3146 if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ 3161 if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){
3147 rc = SQLITE_READONLY; 3162 rc = SQLITE_READONLY;
3148 goto trans_begun; 3163 goto trans_begun;
3149 } 3164 }
3150 3165
3151 #ifndef SQLITE_OMIT_SHARED_CACHE 3166 #ifndef SQLITE_OMIT_SHARED_CACHE
3152 /* If another database handle has already opened a write transaction 3167 {
3153 ** on this shared-btree structure and a second write transaction is 3168 sqlite3 *pBlock = 0;
3154 ** requested, return SQLITE_LOCKED. 3169 /* If another database handle has already opened a write transaction
3155 */ 3170 ** on this shared-btree structure and a second write transaction is
3156 if( (wrflag && pBt->inTransaction==TRANS_WRITE) 3171 ** requested, return SQLITE_LOCKED.
3157 || (pBt->btsFlags & BTS_PENDING)!=0 3172 */
3158 ){ 3173 if( (wrflag && pBt->inTransaction==TRANS_WRITE)
3159 pBlock = pBt->pWriter->db; 3174 || (pBt->btsFlags & BTS_PENDING)!=0
3160 }else if( wrflag>1 ){ 3175 ){
3161 BtLock *pIter; 3176 pBlock = pBt->pWriter->db;
3162 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ 3177 }else if( wrflag>1 ){
3163 if( pIter->pBtree!=p ){ 3178 BtLock *pIter;
3164 pBlock = pIter->pBtree->db; 3179 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
3165 break; 3180 if( pIter->pBtree!=p ){
3181 pBlock = pIter->pBtree->db;
3182 break;
3183 }
3166 } 3184 }
3167 } 3185 }
3168 } 3186 if( pBlock ){
3169 if( pBlock ){ 3187 sqlite3ConnectionBlocked(p->db, pBlock);
3170 sqlite3ConnectionBlocked(p->db, pBlock); 3188 rc = SQLITE_LOCKED_SHAREDCACHE;
3171 rc = SQLITE_LOCKED_SHAREDCACHE; 3189 goto trans_begun;
3172 goto trans_begun; 3190 }
3173 } 3191 }
3174 #endif 3192 #endif
3175 3193
3176 /* Any read-only or read-write transaction implies a read-lock on 3194 /* Any read-only or read-write transaction implies a read-lock on
3177 ** page 1. So if some other shared-cache client already has a write-lock 3195 ** page 1. So if some other shared-cache client already has a write-lock
3178 ** on page 1, the transaction cannot be opened. */ 3196 ** on page 1, the transaction cannot be opened. */
3179 rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK); 3197 rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK);
3180 if( SQLITE_OK!=rc ) goto trans_begun; 3198 if( SQLITE_OK!=rc ) goto trans_begun;
3181 3199
3182 pBt->btsFlags &= ~BTS_INITIALLY_EMPTY; 3200 pBt->btsFlags &= ~BTS_INITIALLY_EMPTY;
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
3268 /* 3286 /*
3269 ** Set the pointer-map entries for all children of page pPage. Also, if 3287 ** Set the pointer-map entries for all children of page pPage. Also, if
3270 ** pPage contains cells that point to overflow pages, set the pointer 3288 ** pPage contains cells that point to overflow pages, set the pointer
3271 ** map entries for the overflow pages as well. 3289 ** map entries for the overflow pages as well.
3272 */ 3290 */
3273 static int setChildPtrmaps(MemPage *pPage){ 3291 static int setChildPtrmaps(MemPage *pPage){
3274 int i; /* Counter variable */ 3292 int i; /* Counter variable */
3275 int nCell; /* Number of cells in page pPage */ 3293 int nCell; /* Number of cells in page pPage */
3276 int rc; /* Return code */ 3294 int rc; /* Return code */
3277 BtShared *pBt = pPage->pBt; 3295 BtShared *pBt = pPage->pBt;
3278 u8 isInitOrig = pPage->isInit;
3279 Pgno pgno = pPage->pgno; 3296 Pgno pgno = pPage->pgno;
3280 3297
3281 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 3298 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
3282 rc = btreeInitPage(pPage); 3299 rc = btreeInitPage(pPage);
3283 if( rc!=SQLITE_OK ){ 3300 if( rc!=SQLITE_OK ) return rc;
3284 goto set_child_ptrmaps_out;
3285 }
3286 nCell = pPage->nCell; 3301 nCell = pPage->nCell;
3287 3302
3288 for(i=0; i<nCell; i++){ 3303 for(i=0; i<nCell; i++){
3289 u8 *pCell = findCell(pPage, i); 3304 u8 *pCell = findCell(pPage, i);
3290 3305
3291 ptrmapPutOvflPtr(pPage, pCell, &rc); 3306 ptrmapPutOvflPtr(pPage, pCell, &rc);
3292 3307
3293 if( !pPage->leaf ){ 3308 if( !pPage->leaf ){
3294 Pgno childPgno = get4byte(pCell); 3309 Pgno childPgno = get4byte(pCell);
3295 ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); 3310 ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc);
3296 } 3311 }
3297 } 3312 }
3298 3313
3299 if( !pPage->leaf ){ 3314 if( !pPage->leaf ){
3300 Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); 3315 Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
3301 ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); 3316 ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc);
3302 } 3317 }
3303 3318
3304 set_child_ptrmaps_out:
3305 pPage->isInit = isInitOrig;
3306 return rc; 3319 return rc;
3307 } 3320 }
3308 3321
3309 /* 3322 /*
3310 ** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so 3323 ** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so
3311 ** that it points to iTo. Parameter eType describes the type of pointer to 3324 ** that it points to iTo. Parameter eType describes the type of pointer to
3312 ** be modified, as follows: 3325 ** be modified, as follows:
3313 ** 3326 **
3314 ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child 3327 ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
3315 ** page of pPage. 3328 ** page of pPage.
3316 ** 3329 **
3317 ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow 3330 ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
3318 ** page pointed to by one of the cells on pPage. 3331 ** page pointed to by one of the cells on pPage.
3319 ** 3332 **
3320 ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next 3333 ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
3321 ** overflow page in the list. 3334 ** overflow page in the list.
3322 */ 3335 */
3323 static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ 3336 static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
3324 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 3337 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
3325 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 3338 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
3326 if( eType==PTRMAP_OVERFLOW2 ){ 3339 if( eType==PTRMAP_OVERFLOW2 ){
3327 /* The pointer is always the first 4 bytes of the page in this case. */ 3340 /* The pointer is always the first 4 bytes of the page in this case. */
3328 if( get4byte(pPage->aData)!=iFrom ){ 3341 if( get4byte(pPage->aData)!=iFrom ){
3329 return SQLITE_CORRUPT_BKPT; 3342 return SQLITE_CORRUPT_BKPT;
3330 } 3343 }
3331 put4byte(pPage->aData, iTo); 3344 put4byte(pPage->aData, iTo);
3332 }else{ 3345 }else{
3333 u8 isInitOrig = pPage->isInit;
3334 int i; 3346 int i;
3335 int nCell; 3347 int nCell;
3336 int rc; 3348 int rc;
3337 3349
3338 rc = btreeInitPage(pPage); 3350 rc = btreeInitPage(pPage);
3339 if( rc ) return rc; 3351 if( rc ) return rc;
3340 nCell = pPage->nCell; 3352 nCell = pPage->nCell;
3341 3353
3342 for(i=0; i<nCell; i++){ 3354 for(i=0; i<nCell; i++){
3343 u8 *pCell = findCell(pPage, i); 3355 u8 *pCell = findCell(pPage, i);
3344 if( eType==PTRMAP_OVERFLOW1 ){ 3356 if( eType==PTRMAP_OVERFLOW1 ){
3345 CellInfo info; 3357 CellInfo info;
3346 pPage->xParseCell(pPage, pCell, &info); 3358 pPage->xParseCell(pPage, pCell, &info);
3347 if( info.nLocal<info.nPayload 3359 if( info.nLocal<info.nPayload ){
3348 && pCell+info.nSize-1<=pPage->aData+pPage->maskPage 3360 if( pCell+info.nSize > pPage->aData+pPage->pBt->usableSize ){
3349 && iFrom==get4byte(pCell+info.nSize-4) 3361 return SQLITE_CORRUPT_BKPT;
3350 ){ 3362 }
3351 put4byte(pCell+info.nSize-4, iTo); 3363 if( iFrom==get4byte(pCell+info.nSize-4) ){
3352 break; 3364 put4byte(pCell+info.nSize-4, iTo);
3365 break;
3366 }
3353 } 3367 }
3354 }else{ 3368 }else{
3355 if( get4byte(pCell)==iFrom ){ 3369 if( get4byte(pCell)==iFrom ){
3356 put4byte(pCell, iTo); 3370 put4byte(pCell, iTo);
3357 break; 3371 break;
3358 } 3372 }
3359 } 3373 }
3360 } 3374 }
3361 3375
3362 if( i==nCell ){ 3376 if( i==nCell ){
3363 if( eType!=PTRMAP_BTREE || 3377 if( eType!=PTRMAP_BTREE ||
3364 get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ 3378 get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
3365 return SQLITE_CORRUPT_BKPT; 3379 return SQLITE_CORRUPT_BKPT;
3366 } 3380 }
3367 put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); 3381 put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
3368 } 3382 }
3369
3370 pPage->isInit = isInitOrig;
3371 } 3383 }
3372 return SQLITE_OK; 3384 return SQLITE_OK;
3373 } 3385 }
3374 3386
3375 3387
3376 /* 3388 /*
3377 ** Move the open database page pDbPage to location iFreePage in the 3389 ** Move the open database page pDbPage to location iFreePage in the
3378 ** database. The pDbPage reference remains valid. 3390 ** database. The pDbPage reference remains valid.
3379 ** 3391 **
3380 ** The isCommit flag indicates that there is no need to remember that 3392 ** The isCommit flag indicates that there is no need to remember that
(...skipping 638 matching lines...) Expand 10 before | Expand all | Expand 10 after
4019 ** from a normal transaction rollback, as no locks are released and the 4031 ** from a normal transaction rollback, as no locks are released and the
4020 ** transaction remains open. 4032 ** transaction remains open.
4021 */ 4033 */
4022 int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ 4034 int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){
4023 int rc = SQLITE_OK; 4035 int rc = SQLITE_OK;
4024 if( p && p->inTrans==TRANS_WRITE ){ 4036 if( p && p->inTrans==TRANS_WRITE ){
4025 BtShared *pBt = p->pBt; 4037 BtShared *pBt = p->pBt;
4026 assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); 4038 assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
4027 assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); 4039 assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) );
4028 sqlite3BtreeEnter(p); 4040 sqlite3BtreeEnter(p);
4029 rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); 4041 if( op==SAVEPOINT_ROLLBACK ){
4042 rc = saveAllCursors(pBt, 0, 0);
4043 }
4044 if( rc==SQLITE_OK ){
4045 rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint);
4046 }
4030 if( rc==SQLITE_OK ){ 4047 if( rc==SQLITE_OK ){
4031 if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){ 4048 if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){
4032 pBt->nPage = 0; 4049 pBt->nPage = 0;
4033 } 4050 }
4034 rc = newDatabase(pBt); 4051 rc = newDatabase(pBt);
4035 pBt->nPage = get4byte(28 + pBt->pPage1->aData); 4052 pBt->nPage = get4byte(28 + pBt->pPage1->aData);
4036 4053
4037 /* The database size was written into the offset 28 of the header 4054 /* The database size was written into the offset 28 of the header
4038 ** when the transaction started, so we know that the value at offset 4055 ** when the transaction started, so we know that the value at offset
4039 ** 28 is nonzero. */ 4056 ** 28 is nonzero. */
4040 assert( pBt->nPage>0 ); 4057 assert( pBt->nPage>0 );
4041 } 4058 }
4042 sqlite3BtreeLeave(p); 4059 sqlite3BtreeLeave(p);
4043 } 4060 }
4044 return rc; 4061 return rc;
4045 } 4062 }
4046 4063
4047 /* 4064 /*
4048 ** Create a new cursor for the BTree whose root is on the page 4065 ** Create a new cursor for the BTree whose root is on the page
4049 ** iTable. If a read-only cursor is requested, it is assumed that 4066 ** iTable. If a read-only cursor is requested, it is assumed that
4050 ** the caller already has at least a read-only transaction open 4067 ** the caller already has at least a read-only transaction open
4051 ** on the database already. If a write-cursor is requested, then 4068 ** on the database already. If a write-cursor is requested, then
4052 ** the caller is assumed to have an open write transaction. 4069 ** the caller is assumed to have an open write transaction.
4053 ** 4070 **
4054 ** If wrFlag==0, then the cursor can only be used for reading. 4071 ** If the BTREE_WRCSR bit of wrFlag is clear, then the cursor can only
4055 ** If wrFlag==1, then the cursor can be used for reading or for 4072 ** be used for reading. If the BTREE_WRCSR bit is set, then the cursor
4056 ** writing if other conditions for writing are also met. These 4073 ** can be used for reading or for writing if other conditions for writing
4057 ** are the conditions that must be met in order for writing to 4074 ** are also met. These are the conditions that must be met in order
4058 ** be allowed: 4075 ** for writing to be allowed:
4059 ** 4076 **
4060 ** 1: The cursor must have been opened with wrFlag==1 4077 ** 1: The cursor must have been opened with wrFlag containing BTREE_WRCSR
4061 ** 4078 **
4062 ** 2: Other database connections that share the same pager cache 4079 ** 2: Other database connections that share the same pager cache
4063 ** but which are not in the READ_UNCOMMITTED state may not have 4080 ** but which are not in the READ_UNCOMMITTED state may not have
4064 ** cursors open with wrFlag==0 on the same table. Otherwise 4081 ** cursors open with wrFlag==0 on the same table. Otherwise
4065 ** the changes made by this write cursor would be visible to 4082 ** the changes made by this write cursor would be visible to
4066 ** the read cursors in the other database connection. 4083 ** the read cursors in the other database connection.
4067 ** 4084 **
4068 ** 3: The database must be writable (not on read-only media) 4085 ** 3: The database must be writable (not on read-only media)
4069 ** 4086 **
4070 ** 4: There must be an active transaction. 4087 ** 4: There must be an active transaction.
4071 ** 4088 **
4089 ** The BTREE_FORDELETE bit of wrFlag may optionally be set if BTREE_WRCSR
4090 ** is set. If FORDELETE is set, that is a hint to the implementation that
4091 ** this cursor will only be used to seek to and delete entries of an index
4092 ** as part of a larger DELETE statement. The FORDELETE hint is not used by
4093 ** this implementation. But in a hypothetical alternative storage engine
4094 ** in which index entries are automatically deleted when corresponding table
4095 ** rows are deleted, the FORDELETE flag is a hint that all SEEK and DELETE
4096 ** operations on this cursor can be no-ops and all READ operations can
4097 ** return a null row (2-bytes: 0x01 0x00).
4098 **
4072 ** No checking is done to make sure that page iTable really is the 4099 ** No checking is done to make sure that page iTable really is the
4073 ** root page of a b-tree. If it is not, then the cursor acquired 4100 ** root page of a b-tree. If it is not, then the cursor acquired
4074 ** will not work correctly. 4101 ** will not work correctly.
4075 ** 4102 **
4076 ** It is assumed that the sqlite3BtreeCursorZero() has been called 4103 ** It is assumed that the sqlite3BtreeCursorZero() has been called
4077 ** on pCur to initialize the memory space prior to invoking this routine. 4104 ** on pCur to initialize the memory space prior to invoking this routine.
4078 */ 4105 */
4079 static int btreeCursor( 4106 static int btreeCursor(
4080 Btree *p, /* The btree */ 4107 Btree *p, /* The btree */
4081 int iTable, /* Root page of table to open */ 4108 int iTable, /* Root page of table to open */
(...skipping 18 matching lines...) Expand all
4100 assert( wrFlag==0 || !hasReadConflicts(p, iTable) ); 4127 assert( wrFlag==0 || !hasReadConflicts(p, iTable) );
4101 4128
4102 /* Assert that the caller has opened the required transaction. */ 4129 /* Assert that the caller has opened the required transaction. */
4103 assert( p->inTrans>TRANS_NONE ); 4130 assert( p->inTrans>TRANS_NONE );
4104 assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); 4131 assert( wrFlag==0 || p->inTrans==TRANS_WRITE );
4105 assert( pBt->pPage1 && pBt->pPage1->aData ); 4132 assert( pBt->pPage1 && pBt->pPage1->aData );
4106 assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); 4133 assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 );
4107 4134
4108 if( wrFlag ){ 4135 if( wrFlag ){
4109 allocateTempSpace(pBt); 4136 allocateTempSpace(pBt);
4110 if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; 4137 if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM_BKPT;
4111 } 4138 }
4112 if( iTable==1 && btreePagecount(pBt)==0 ){ 4139 if( iTable==1 && btreePagecount(pBt)==0 ){
4113 assert( wrFlag==0 ); 4140 assert( wrFlag==0 );
4114 iTable = 0; 4141 iTable = 0;
4115 } 4142 }
4116 4143
4117 /* Now that no other errors can occur, finish filling in the BtCursor 4144 /* Now that no other errors can occur, finish filling in the BtCursor
4118 ** variables and link the cursor into the BtShared list. */ 4145 ** variables and link the cursor into the BtShared list. */
4119 pCur->pgnoRoot = (Pgno)iTable; 4146 pCur->pgnoRoot = (Pgno)iTable;
4120 pCur->iPage = -1; 4147 pCur->iPage = -1;
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
4245 #ifndef NDEBUG /* The next routine used only within assert() statements */ 4272 #ifndef NDEBUG /* The next routine used only within assert() statements */
4246 /* 4273 /*
4247 ** Return true if the given BtCursor is valid. A valid cursor is one 4274 ** Return true if the given BtCursor is valid. A valid cursor is one
4248 ** that is currently pointing to a row in a (non-empty) table. 4275 ** that is currently pointing to a row in a (non-empty) table.
4249 ** This is a verification routine is used only within assert() statements. 4276 ** This is a verification routine is used only within assert() statements.
4250 */ 4277 */
4251 int sqlite3BtreeCursorIsValid(BtCursor *pCur){ 4278 int sqlite3BtreeCursorIsValid(BtCursor *pCur){
4252 return pCur && pCur->eState==CURSOR_VALID; 4279 return pCur && pCur->eState==CURSOR_VALID;
4253 } 4280 }
4254 #endif /* NDEBUG */ 4281 #endif /* NDEBUG */
4255 4282 int sqlite3BtreeCursorIsValidNN(BtCursor *pCur){
4256 /* 4283 assert( pCur!=0 );
4257 ** Set *pSize to the size of the buffer needed to hold the value of 4284 return pCur->eState==CURSOR_VALID;
4258 ** the key for the current entry. If the cursor is not pointing
4259 ** to a valid entry, *pSize is set to 0.
4260 **
4261 ** For a table with the INTKEY flag set, this routine returns the key
4262 ** itself, not the number of bytes in the key.
4263 **
4264 ** The caller must position the cursor prior to invoking this routine.
4265 **
4266 ** This routine cannot fail. It always returns SQLITE_OK.
4267 */
4268 int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
4269 assert( cursorHoldsMutex(pCur) );
4270 assert( pCur->eState==CURSOR_VALID );
4271 getCellInfo(pCur);
4272 *pSize = pCur->info.nKey;
4273 return SQLITE_OK;
4274 } 4285 }
4275 4286
4276 /* 4287 /*
4277 ** Set *pSize to the number of bytes of data in the entry the 4288 ** Return the value of the integer key or "rowid" for a table btree.
4278 ** cursor currently points to. 4289 ** This routine is only valid for a cursor that is pointing into a
4290 ** ordinary table btree. If the cursor points to an index btree or
4291 ** is invalid, the result of this routine is undefined.
4292 */
4293 i64 sqlite3BtreeIntegerKey(BtCursor *pCur){
4294 assert( cursorHoldsMutex(pCur) );
4295 assert( pCur->eState==CURSOR_VALID );
4296 assert( pCur->curIntKey );
4297 getCellInfo(pCur);
4298 return pCur->info.nKey;
4299 }
4300
4301 /*
4302 ** Return the number of bytes of payload for the entry that pCur is
4303 ** currently pointing to. For table btrees, this will be the amount
4304 ** of data. For index btrees, this will be the size of the key.
4279 ** 4305 **
4280 ** The caller must guarantee that the cursor is pointing to a non-NULL 4306 ** The caller must guarantee that the cursor is pointing to a non-NULL
4281 ** valid entry. In other words, the calling procedure must guarantee 4307 ** valid entry. In other words, the calling procedure must guarantee
4282 ** that the cursor has Cursor.eState==CURSOR_VALID. 4308 ** that the cursor has Cursor.eState==CURSOR_VALID.
4283 **
4284 ** Failure is not possible. This function always returns SQLITE_OK.
4285 ** It might just as well be a procedure (returning void) but we continue
4286 ** to return an integer result code for historical reasons.
4287 */ 4309 */
4288 int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ 4310 u32 sqlite3BtreePayloadSize(BtCursor *pCur){
4289 assert( cursorHoldsMutex(pCur) ); 4311 assert( cursorHoldsMutex(pCur) );
4290 assert( pCur->eState==CURSOR_VALID ); 4312 assert( pCur->eState==CURSOR_VALID );
4291 assert( pCur->iPage>=0 );
4292 assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
4293 assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 );
4294 getCellInfo(pCur); 4313 getCellInfo(pCur);
4295 *pSize = pCur->info.nPayload; 4314 return pCur->info.nPayload;
4296 return SQLITE_OK;
4297 } 4315 }
4298 4316
4299 /* 4317 /*
4300 ** Given the page number of an overflow page in the database (parameter 4318 ** Given the page number of an overflow page in the database (parameter
4301 ** ovfl), this function finds the page number of the next page in the 4319 ** ovfl), this function finds the page number of the next page in the
4302 ** linked list of overflow pages. If possible, it uses the auto-vacuum 4320 ** linked list of overflow pages. If possible, it uses the auto-vacuum
4303 ** pointer-map data instead of reading the content of page ovfl to do so. 4321 ** pointer-map data instead of reading the content of page ovfl to do so.
4304 ** 4322 **
4305 ** If an error occurs an SQLite error code is returned. Otherwise: 4323 ** If an error occurs an SQLite error code is returned. Otherwise:
4306 ** 4324 **
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
4404 return SQLITE_OK; 4422 return SQLITE_OK;
4405 } 4423 }
4406 4424
4407 /* 4425 /*
4408 ** This function is used to read or overwrite payload information 4426 ** This function is used to read or overwrite payload information
4409 ** for the entry that the pCur cursor is pointing to. The eOp 4427 ** for the entry that the pCur cursor is pointing to. The eOp
4410 ** argument is interpreted as follows: 4428 ** argument is interpreted as follows:
4411 ** 4429 **
4412 ** 0: The operation is a read. Populate the overflow cache. 4430 ** 0: The operation is a read. Populate the overflow cache.
4413 ** 1: The operation is a write. Populate the overflow cache. 4431 ** 1: The operation is a write. Populate the overflow cache.
4414 ** 2: The operation is a read. Do not populate the overflow cache.
4415 ** 4432 **
4416 ** A total of "amt" bytes are read or written beginning at "offset". 4433 ** A total of "amt" bytes are read or written beginning at "offset".
4417 ** Data is read to or from the buffer pBuf. 4434 ** Data is read to or from the buffer pBuf.
4418 ** 4435 **
4419 ** The content being read or written might appear on the main page 4436 ** The content being read or written might appear on the main page
4420 ** or be scattered out on multiple overflow pages. 4437 ** or be scattered out on multiple overflow pages.
4421 ** 4438 **
4422 ** If the current cursor entry uses one or more overflow pages and the 4439 ** If the current cursor entry uses one or more overflow pages
4423 ** eOp argument is not 2, this function may allocate space for and lazily 4440 ** this function may allocate space for and lazily populate
4424 ** populates the overflow page-list cache array (BtCursor.aOverflow). 4441 ** the overflow page-list cache array (BtCursor.aOverflow).
4425 ** Subsequent calls use this cache to make seeking to the supplied offset 4442 ** Subsequent calls use this cache to make seeking to the supplied offset
4426 ** more efficient. 4443 ** more efficient.
4427 ** 4444 **
4428 ** Once an overflow page-list cache has been allocated, it may be 4445 ** Once an overflow page-list cache has been allocated, it must be
4429 ** invalidated if some other cursor writes to the same table, or if 4446 ** invalidated if some other cursor writes to the same table, or if
4430 ** the cursor is moved to a different row. Additionally, in auto-vacuum 4447 ** the cursor is moved to a different row. Additionally, in auto-vacuum
4431 ** mode, the following events may invalidate an overflow page-list cache. 4448 ** mode, the following events may invalidate an overflow page-list cache.
4432 ** 4449 **
4433 ** * An incremental vacuum, 4450 ** * An incremental vacuum,
4434 ** * A commit in auto_vacuum="full" mode, 4451 ** * A commit in auto_vacuum="full" mode,
4435 ** * Creating a table (may require moving an overflow page). 4452 ** * Creating a table (may require moving an overflow page).
4436 */ 4453 */
4437 static int accessPayload( 4454 static int accessPayload(
4438 BtCursor *pCur, /* Cursor pointing to entry to read from */ 4455 BtCursor *pCur, /* Cursor pointing to entry to read from */
4439 u32 offset, /* Begin reading this far into payload */ 4456 u32 offset, /* Begin reading this far into payload */
4440 u32 amt, /* Read this many bytes */ 4457 u32 amt, /* Read this many bytes */
4441 unsigned char *pBuf, /* Write the bytes into this buffer */ 4458 unsigned char *pBuf, /* Write the bytes into this buffer */
4442 int eOp /* zero to read. non-zero to write. */ 4459 int eOp /* zero to read. non-zero to write. */
4443 ){ 4460 ){
4444 unsigned char *aPayload; 4461 unsigned char *aPayload;
4445 int rc = SQLITE_OK; 4462 int rc = SQLITE_OK;
4446 int iIdx = 0; 4463 int iIdx = 0;
4447 MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ 4464 MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */
4448 BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ 4465 BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */
4449 #ifdef SQLITE_DIRECT_OVERFLOW_READ 4466 #ifdef SQLITE_DIRECT_OVERFLOW_READ
4450 unsigned char * const pBufStart = pBuf; 4467 unsigned char * const pBufStart = pBuf; /* Start of original out buffer */
4451 int bEnd; /* True if reading to end of data */
4452 #endif 4468 #endif
4453 4469
4454 assert( pPage ); 4470 assert( pPage );
4471 assert( eOp==0 || eOp==1 );
4455 assert( pCur->eState==CURSOR_VALID ); 4472 assert( pCur->eState==CURSOR_VALID );
4456 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); 4473 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
4457 assert( cursorHoldsMutex(pCur) ); 4474 assert( cursorHoldsMutex(pCur) );
4458 assert( eOp!=2 || offset==0 ); /* Always start from beginning for eOp==2 */
4459 4475
4460 getCellInfo(pCur); 4476 getCellInfo(pCur);
4461 aPayload = pCur->info.pPayload; 4477 aPayload = pCur->info.pPayload;
4462 #ifdef SQLITE_DIRECT_OVERFLOW_READ
4463 bEnd = offset+amt==pCur->info.nPayload;
4464 #endif
4465 assert( offset+amt <= pCur->info.nPayload ); 4478 assert( offset+amt <= pCur->info.nPayload );
4466 4479
4467 if( &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] ){ 4480 assert( aPayload > pPage->aData );
4468 /* Trying to read or write past the end of the data is an error */ 4481 if( (uptr)(aPayload - pPage->aData) > (pBt->usableSize - pCur->info.nLocal) ){
4482 /* Trying to read or write past the end of the data is an error. The
4483 ** conditional above is really:
4484 ** &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize]
4485 ** but is recast into its current form to avoid integer overflow problems
4486 */
4469 return SQLITE_CORRUPT_BKPT; 4487 return SQLITE_CORRUPT_BKPT;
4470 } 4488 }
4471 4489
4472 /* Check if data must be read/written to/from the btree page itself. */ 4490 /* Check if data must be read/written to/from the btree page itself. */
4473 if( offset<pCur->info.nLocal ){ 4491 if( offset<pCur->info.nLocal ){
4474 int a = amt; 4492 int a = amt;
4475 if( a+offset>pCur->info.nLocal ){ 4493 if( a+offset>pCur->info.nLocal ){
4476 a = pCur->info.nLocal - offset; 4494 a = pCur->info.nLocal - offset;
4477 } 4495 }
4478 rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); 4496 rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
4479 offset = 0; 4497 offset = 0;
4480 pBuf += a; 4498 pBuf += a;
4481 amt -= a; 4499 amt -= a;
4482 }else{ 4500 }else{
4483 offset -= pCur->info.nLocal; 4501 offset -= pCur->info.nLocal;
4484 } 4502 }
4485 4503
4486 4504
4487 if( rc==SQLITE_OK && amt>0 ){ 4505 if( rc==SQLITE_OK && amt>0 ){
4488 const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ 4506 const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */
4489 Pgno nextPage; 4507 Pgno nextPage;
4490 4508
4491 nextPage = get4byte(&aPayload[pCur->info.nLocal]); 4509 nextPage = get4byte(&aPayload[pCur->info.nLocal]);
4492 4510
4493 /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. 4511 /* If the BtCursor.aOverflow[] has not been allocated, allocate it now.
4494 ** Except, do not allocate aOverflow[] for eOp==2.
4495 ** 4512 **
4496 ** The aOverflow[] array is sized at one entry for each overflow page 4513 ** The aOverflow[] array is sized at one entry for each overflow page
4497 ** in the overflow chain. The page number of the first overflow page is 4514 ** in the overflow chain. The page number of the first overflow page is
4498 ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array 4515 ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array
4499 ** means "not yet known" (the cache is lazily populated). 4516 ** means "not yet known" (the cache is lazily populated).
4500 */ 4517 */
4501 if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){ 4518 if( (pCur->curFlags & BTCF_ValidOvfl)==0 ){
4502 int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; 4519 int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
4503 if( nOvfl>pCur->nOvflAlloc ){ 4520 if( nOvfl>pCur->nOvflAlloc ){
4504 Pgno *aNew = (Pgno*)sqlite3Realloc( 4521 Pgno *aNew = (Pgno*)sqlite3Realloc(
4505 pCur->aOverflow, nOvfl*2*sizeof(Pgno) 4522 pCur->aOverflow, nOvfl*2*sizeof(Pgno)
4506 ); 4523 );
4507 if( aNew==0 ){ 4524 if( aNew==0 ){
4508 rc = SQLITE_NOMEM; 4525 return SQLITE_NOMEM_BKPT;
4509 }else{ 4526 }else{
4510 pCur->nOvflAlloc = nOvfl*2; 4527 pCur->nOvflAlloc = nOvfl*2;
4511 pCur->aOverflow = aNew; 4528 pCur->aOverflow = aNew;
4512 } 4529 }
4513 } 4530 }
4514 if( rc==SQLITE_OK ){ 4531 memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno));
4515 memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); 4532 pCur->curFlags |= BTCF_ValidOvfl;
4516 pCur->curFlags |= BTCF_ValidOvfl; 4533 }else{
4534 /* If the overflow page-list cache has been allocated and the
4535 ** entry for the first required overflow page is valid, skip
4536 ** directly to it.
4537 */
4538 if( pCur->aOverflow[offset/ovflSize] ){
4539 iIdx = (offset/ovflSize);
4540 nextPage = pCur->aOverflow[iIdx];
4541 offset = (offset%ovflSize);
4517 } 4542 }
4518 } 4543 }
4519 4544
4520 /* If the overflow page-list cache has been allocated and the 4545 assert( rc==SQLITE_OK && amt>0 );
4521 ** entry for the first required overflow page is valid, skip 4546 while( nextPage ){
4522 ** directly to it.
4523 */
4524 if( (pCur->curFlags & BTCF_ValidOvfl)!=0
4525 && pCur->aOverflow[offset/ovflSize]
4526 ){
4527 iIdx = (offset/ovflSize);
4528 nextPage = pCur->aOverflow[iIdx];
4529 offset = (offset%ovflSize);
4530 }
4531
4532 for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
4533
4534 /* If required, populate the overflow page-list cache. */ 4547 /* If required, populate the overflow page-list cache. */
4535 if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){ 4548 assert( pCur->aOverflow[iIdx]==0
4536 assert( pCur->aOverflow[iIdx]==0 4549 || pCur->aOverflow[iIdx]==nextPage
4537 || pCur->aOverflow[iIdx]==nextPage 4550 || CORRUPT_DB );
4538 || CORRUPT_DB ); 4551 pCur->aOverflow[iIdx] = nextPage;
4539 pCur->aOverflow[iIdx] = nextPage;
4540 }
4541 4552
4542 if( offset>=ovflSize ){ 4553 if( offset>=ovflSize ){
4543 /* The only reason to read this page is to obtain the page 4554 /* The only reason to read this page is to obtain the page
4544 ** number for the next page in the overflow chain. The page 4555 ** number for the next page in the overflow chain. The page
4545 ** data is not required. So first try to lookup the overflow 4556 ** data is not required. So first try to lookup the overflow
4546 ** page-list cache, if any, then fall back to the getOverflowPage() 4557 ** page-list cache, if any, then fall back to the getOverflowPage()
4547 ** function. 4558 ** function.
4548 **
4549 ** Note that the aOverflow[] array must be allocated because eOp!=2
4550 ** here. If eOp==2, then offset==0 and this branch is never taken.
4551 */ 4559 */
4552 assert( eOp!=2 );
4553 assert( pCur->curFlags & BTCF_ValidOvfl ); 4560 assert( pCur->curFlags & BTCF_ValidOvfl );
4554 assert( pCur->pBtree->db==pBt->db ); 4561 assert( pCur->pBtree->db==pBt->db );
4555 if( pCur->aOverflow[iIdx+1] ){ 4562 if( pCur->aOverflow[iIdx+1] ){
4556 nextPage = pCur->aOverflow[iIdx+1]; 4563 nextPage = pCur->aOverflow[iIdx+1];
4557 }else{ 4564 }else{
4558 rc = getOverflowPage(pBt, nextPage, 0, &nextPage); 4565 rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
4559 } 4566 }
4560 offset -= ovflSize; 4567 offset -= ovflSize;
4561 }else{ 4568 }else{
4562 /* Need to read this page properly. It contains some of the 4569 /* Need to read this page properly. It contains some of the
4563 ** range of data that is being read (eOp==0) or written (eOp!=0). 4570 ** range of data that is being read (eOp==0) or written (eOp!=0).
4564 */ 4571 */
4565 #ifdef SQLITE_DIRECT_OVERFLOW_READ 4572 #ifdef SQLITE_DIRECT_OVERFLOW_READ
4566 sqlite3_file *fd; 4573 sqlite3_file *fd; /* File from which to do direct overflow read */
4567 #endif 4574 #endif
4568 int a = amt; 4575 int a = amt;
4569 if( a + offset > ovflSize ){ 4576 if( a + offset > ovflSize ){
4570 a = ovflSize - offset; 4577 a = ovflSize - offset;
4571 } 4578 }
4572 4579
4573 #ifdef SQLITE_DIRECT_OVERFLOW_READ 4580 #ifdef SQLITE_DIRECT_OVERFLOW_READ
4574 /* If all the following are true: 4581 /* If all the following are true:
4575 ** 4582 **
4576 ** 1) this is a read operation, and 4583 ** 1) this is a read operation, and
4577 ** 2) data is required from the start of this overflow page, and 4584 ** 2) data is required from the start of this overflow page, and
4578 ** 3) the database is file-backed, and 4585 ** 3) there is no open write-transaction, and
4579 ** 4) there is no open write-transaction, and 4586 ** 4) the database is file-backed, and
4580 ** 5) the database is not a WAL database, 4587 ** 5) the page is not in the WAL file
4581 ** 6) all data from the page is being read. 4588 ** 6) at least 4 bytes have already been read into the output buffer
4582 ** 7) at least 4 bytes have already been read into the output buffer
4583 ** 4589 **
4584 ** then data can be read directly from the database file into the 4590 ** then data can be read directly from the database file into the
4585 ** output buffer, bypassing the page-cache altogether. This speeds 4591 ** output buffer, bypassing the page-cache altogether. This speeds
4586 ** up loading large records that span many overflow pages. 4592 ** up loading large records that span many overflow pages.
4587 */ 4593 */
4588 if( (eOp&0x01)==0 /* (1) */ 4594 if( eOp==0 /* (1) */
4589 && offset==0 /* (2) */ 4595 && offset==0 /* (2) */
4590 && (bEnd || a==ovflSize) /* (6) */ 4596 && pBt->inTransaction==TRANS_READ /* (3) */
4591 && pBt->inTransaction==TRANS_READ /* (4) */ 4597 && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (4) */
4592 && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ 4598 && 0==sqlite3PagerUseWal(pBt->pPager, nextPage) /* (5) */
4593 && pBt->pPage1->aData[19]==0x01 /* (5) */ 4599 && &pBuf[-4]>=pBufStart /* (6) */
4594 && &pBuf[-4]>=pBufStart /* (7) */
4595 ){ 4600 ){
4596 u8 aSave[4]; 4601 u8 aSave[4];
4597 u8 *aWrite = &pBuf[-4]; 4602 u8 *aWrite = &pBuf[-4];
4598 assert( aWrite>=pBufStart ); /* hence (7) */ 4603 assert( aWrite>=pBufStart ); /* due to (6) */
4599 memcpy(aSave, aWrite, 4); 4604 memcpy(aSave, aWrite, 4);
4600 rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); 4605 rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1));
4601 nextPage = get4byte(aWrite); 4606 nextPage = get4byte(aWrite);
4602 memcpy(aWrite, aSave, 4); 4607 memcpy(aWrite, aSave, 4);
4603 }else 4608 }else
4604 #endif 4609 #endif
4605 4610
4606 { 4611 {
4607 DbPage *pDbPage; 4612 DbPage *pDbPage;
4608 rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage, 4613 rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage,
4609 ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) 4614 (eOp==0 ? PAGER_GET_READONLY : 0)
4610 ); 4615 );
4611 if( rc==SQLITE_OK ){ 4616 if( rc==SQLITE_OK ){
4612 aPayload = sqlite3PagerGetData(pDbPage); 4617 aPayload = sqlite3PagerGetData(pDbPage);
4613 nextPage = get4byte(aPayload); 4618 nextPage = get4byte(aPayload);
4614 rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); 4619 rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
4615 sqlite3PagerUnref(pDbPage); 4620 sqlite3PagerUnref(pDbPage);
4616 offset = 0; 4621 offset = 0;
4617 } 4622 }
4618 } 4623 }
4619 amt -= a; 4624 amt -= a;
4625 if( amt==0 ) return rc;
4620 pBuf += a; 4626 pBuf += a;
4621 } 4627 }
4628 if( rc ) break;
4629 iIdx++;
4622 } 4630 }
4623 } 4631 }
4624 4632
4625 if( rc==SQLITE_OK && amt>0 ){ 4633 if( rc==SQLITE_OK && amt>0 ){
4626 return SQLITE_CORRUPT_BKPT; 4634 return SQLITE_CORRUPT_BKPT; /* Overflow chain ends prematurely */
4627 } 4635 }
4628 return rc; 4636 return rc;
4629 } 4637 }
4630 4638
4631 /* 4639 /*
4632 ** Read part of the key associated with cursor pCur. Exactly 4640 ** Read part of the payload for the row at which that cursor pCur is currently
4633 ** "amt" bytes will be transferred into pBuf[]. The transfer 4641 ** pointing. "amt" bytes will be transferred into pBuf[]. The transfer
4634 ** begins at "offset". 4642 ** begins at "offset".
4635 ** 4643 **
4636 ** The caller must ensure that pCur is pointing to a valid row 4644 ** pCur can be pointing to either a table or an index b-tree.
4637 ** in the table. 4645 ** If pointing to a table btree, then the content section is read. If
4646 ** pCur is pointing to an index b-tree then the key section is read.
4647 **
4648 ** For sqlite3BtreePayload(), the caller must ensure that pCur is pointing
4649 ** to a valid row in the table. For sqlite3BtreePayloadChecked(), the
4650 ** cursor might be invalid or might need to be restored before being read.
4638 ** 4651 **
4639 ** Return SQLITE_OK on success or an error code if anything goes 4652 ** Return SQLITE_OK on success or an error code if anything goes
4640 ** wrong. An error is returned if "offset+amt" is larger than 4653 ** wrong. An error is returned if "offset+amt" is larger than
4641 ** the available payload. 4654 ** the available payload.
4642 */ 4655 */
4643 int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ 4656 int sqlite3BtreePayload(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
4644 assert( cursorHoldsMutex(pCur) ); 4657 assert( cursorHoldsMutex(pCur) );
4645 assert( pCur->eState==CURSOR_VALID ); 4658 assert( pCur->eState==CURSOR_VALID );
4646 assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); 4659 assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] );
4647 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 4660 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
4648 return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0); 4661 return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0);
4649 } 4662 }
4650 4663
4651 /* 4664 /*
4652 ** Read part of the data associated with cursor pCur. Exactly 4665 ** This variant of sqlite3BtreePayload() works even if the cursor has not
4653 ** "amt" bytes will be transfered into pBuf[]. The transfer 4666 ** in the CURSOR_VALID state. It is only used by the sqlite3_blob_read()
4654 ** begins at "offset". 4667 ** interface.
4655 **
4656 ** Return SQLITE_OK on success or an error code if anything goes
4657 ** wrong. An error is returned if "offset+amt" is larger than
4658 ** the available payload.
4659 */ 4668 */
4660 int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ 4669 #ifndef SQLITE_OMIT_INCRBLOB
4670 static SQLITE_NOINLINE int accessPayloadChecked(
4671 BtCursor *pCur,
4672 u32 offset,
4673 u32 amt,
4674 void *pBuf
4675 ){
4661 int rc; 4676 int rc;
4662
4663 #ifndef SQLITE_OMIT_INCRBLOB
4664 if ( pCur->eState==CURSOR_INVALID ){ 4677 if ( pCur->eState==CURSOR_INVALID ){
4665 return SQLITE_ABORT; 4678 return SQLITE_ABORT;
4666 } 4679 }
4667 #endif 4680 assert( cursorOwnsBtShared(pCur) );
4668 4681 rc = btreeRestoreCursorPosition(pCur);
4669 assert( cursorHoldsMutex(pCur) ); 4682 return rc ? rc : accessPayload(pCur, offset, amt, pBuf, 0);
4670 rc = restoreCursorPosition(pCur); 4683 }
4671 if( rc==SQLITE_OK ){ 4684 int sqlite3BtreePayloadChecked(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
4672 assert( pCur->eState==CURSOR_VALID ); 4685 if( pCur->eState==CURSOR_VALID ){
4673 assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); 4686 assert( cursorOwnsBtShared(pCur) );
4674 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 4687 return accessPayload(pCur, offset, amt, pBuf, 0);
4675 rc = accessPayload(pCur, offset, amt, pBuf, 0); 4688 }else{
4689 return accessPayloadChecked(pCur, offset, amt, pBuf);
4676 } 4690 }
4677 return rc;
4678 } 4691 }
4692 #endif /* SQLITE_OMIT_INCRBLOB */
4679 4693
4680 /* 4694 /*
4681 ** Return a pointer to payload information from the entry that the 4695 ** Return a pointer to payload information from the entry that the
4682 ** pCur cursor is pointing to. The pointer is to the beginning of 4696 ** pCur cursor is pointing to. The pointer is to the beginning of
4683 ** the key if index btrees (pPage->intKey==0) and is the data for 4697 ** the key if index btrees (pPage->intKey==0) and is the data for
4684 ** table btrees (pPage->intKey==1). The number of bytes of available 4698 ** table btrees (pPage->intKey==1). The number of bytes of available
4685 ** key/data is written into *pAmt. If *pAmt==0, then the value 4699 ** key/data is written into *pAmt. If *pAmt==0, then the value
4686 ** returned will not be a valid pointer. 4700 ** returned will not be a valid pointer.
4687 ** 4701 **
4688 ** This routine is an optimization. It is common for the entire key 4702 ** This routine is an optimization. It is common for the entire key
4689 ** and data to fit on the local page and for there to be no overflow 4703 ** and data to fit on the local page and for there to be no overflow
4690 ** pages. When that is so, this routine can be used to access the 4704 ** pages. When that is so, this routine can be used to access the
4691 ** key and data without making a copy. If the key and/or data spills 4705 ** key and data without making a copy. If the key and/or data spills
4692 ** onto overflow pages, then accessPayload() must be used to reassemble 4706 ** onto overflow pages, then accessPayload() must be used to reassemble
4693 ** the key/data and copy it into a preallocated buffer. 4707 ** the key/data and copy it into a preallocated buffer.
4694 ** 4708 **
4695 ** The pointer returned by this routine looks directly into the cached 4709 ** The pointer returned by this routine looks directly into the cached
4696 ** page of the database. The data might change or move the next time 4710 ** page of the database. The data might change or move the next time
4697 ** any btree routine is called. 4711 ** any btree routine is called.
4698 */ 4712 */
4699 static const void *fetchPayload( 4713 static const void *fetchPayload(
4700 BtCursor *pCur, /* Cursor pointing to entry to read from */ 4714 BtCursor *pCur, /* Cursor pointing to entry to read from */
4701 u32 *pAmt /* Write the number of available bytes here */ 4715 u32 *pAmt /* Write the number of available bytes here */
4702 ){ 4716 ){
4703 u32 amt; 4717 u32 amt;
4704 assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); 4718 assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]);
4705 assert( pCur->eState==CURSOR_VALID ); 4719 assert( pCur->eState==CURSOR_VALID );
4706 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 4720 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
4707 assert( cursorHoldsMutex(pCur) ); 4721 assert( cursorOwnsBtShared(pCur) );
4708 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 4722 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
4709 assert( pCur->info.nSize>0 ); 4723 assert( pCur->info.nSize>0 );
4710 assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB ); 4724 assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB );
4711 assert( pCur->info.pPayload<pCur->apPage[pCur->iPage]->aDataEnd ||CORRUPT_DB); 4725 assert( pCur->info.pPayload<pCur->apPage[pCur->iPage]->aDataEnd ||CORRUPT_DB);
4712 amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload); 4726 amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload);
4713 if( pCur->info.nLocal<amt ) amt = pCur->info.nLocal; 4727 if( pCur->info.nLocal<amt ) amt = pCur->info.nLocal;
4714 *pAmt = amt; 4728 *pAmt = amt;
4715 return (void*)pCur->info.pPayload; 4729 return (void*)pCur->info.pPayload;
4716 } 4730 }
4717 4731
4718 4732
4719 /* 4733 /*
4720 ** For the entry that cursor pCur is point to, return as 4734 ** For the entry that cursor pCur is point to, return as
4721 ** many bytes of the key or data as are available on the local 4735 ** many bytes of the key or data as are available on the local
4722 ** b-tree page. Write the number of available bytes into *pAmt. 4736 ** b-tree page. Write the number of available bytes into *pAmt.
4723 ** 4737 **
4724 ** The pointer returned is ephemeral. The key/data may move 4738 ** The pointer returned is ephemeral. The key/data may move
4725 ** or be destroyed on the next call to any Btree routine, 4739 ** or be destroyed on the next call to any Btree routine,
4726 ** including calls from other threads against the same cache. 4740 ** including calls from other threads against the same cache.
4727 ** Hence, a mutex on the BtShared should be held prior to calling 4741 ** Hence, a mutex on the BtShared should be held prior to calling
4728 ** this routine. 4742 ** this routine.
4729 ** 4743 **
4730 ** These routines is used to get quick access to key and data 4744 ** These routines is used to get quick access to key and data
4731 ** in the common case where no overflow pages are used. 4745 ** in the common case where no overflow pages are used.
4732 */ 4746 */
4733 const void *sqlite3BtreeKeyFetch(BtCursor *pCur, u32 *pAmt){ 4747 const void *sqlite3BtreePayloadFetch(BtCursor *pCur, u32 *pAmt){
4734 return fetchPayload(pCur, pAmt);
4735 }
4736 const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){
4737 return fetchPayload(pCur, pAmt); 4748 return fetchPayload(pCur, pAmt);
4738 } 4749 }
4739 4750
4740 4751
4741 /* 4752 /*
4742 ** Move the cursor down to a new child page. The newPgno argument is the 4753 ** Move the cursor down to a new child page. The newPgno argument is the
4743 ** page number of the child page to move to. 4754 ** page number of the child page to move to.
4744 ** 4755 **
4745 ** This function returns SQLITE_CORRUPT if the page-header flags field of 4756 ** This function returns SQLITE_CORRUPT if the page-header flags field of
4746 ** the new child page does not match the flags field of the parent (i.e. 4757 ** the new child page does not match the flags field of the parent (i.e.
4747 ** if an intkey page appears to be the parent of a non-intkey page, or 4758 ** if an intkey page appears to be the parent of a non-intkey page, or
4748 ** vice-versa). 4759 ** vice-versa).
4749 */ 4760 */
4750 static int moveToChild(BtCursor *pCur, u32 newPgno){ 4761 static int moveToChild(BtCursor *pCur, u32 newPgno){
4751 BtShared *pBt = pCur->pBt; 4762 BtShared *pBt = pCur->pBt;
4752 4763
4753 assert( cursorHoldsMutex(pCur) ); 4764 assert( cursorOwnsBtShared(pCur) );
4754 assert( pCur->eState==CURSOR_VALID ); 4765 assert( pCur->eState==CURSOR_VALID );
4755 assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); 4766 assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
4756 assert( pCur->iPage>=0 ); 4767 assert( pCur->iPage>=0 );
4757 if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ 4768 if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
4758 return SQLITE_CORRUPT_BKPT; 4769 return SQLITE_CORRUPT_BKPT;
4759 } 4770 }
4760 pCur->info.nSize = 0; 4771 pCur->info.nSize = 0;
4761 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); 4772 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
4762 pCur->iPage++; 4773 pCur->iPage++;
4763 pCur->aiIdx[pCur->iPage] = 0; 4774 pCur->aiIdx[pCur->iPage] = 0;
(...skipping 25 matching lines...) Expand all
4789 4800
4790 /* 4801 /*
4791 ** Move the cursor up to the parent page. 4802 ** Move the cursor up to the parent page.
4792 ** 4803 **
4793 ** pCur->idx is set to the cell index that contains the pointer 4804 ** pCur->idx is set to the cell index that contains the pointer
4794 ** to the page we are coming from. If we are coming from the 4805 ** to the page we are coming from. If we are coming from the
4795 ** right-most child page then pCur->idx is set to one more than 4806 ** right-most child page then pCur->idx is set to one more than
4796 ** the largest cell index. 4807 ** the largest cell index.
4797 */ 4808 */
4798 static void moveToParent(BtCursor *pCur){ 4809 static void moveToParent(BtCursor *pCur){
4799 assert( cursorHoldsMutex(pCur) ); 4810 assert( cursorOwnsBtShared(pCur) );
4800 assert( pCur->eState==CURSOR_VALID ); 4811 assert( pCur->eState==CURSOR_VALID );
4801 assert( pCur->iPage>0 ); 4812 assert( pCur->iPage>0 );
4802 assert( pCur->apPage[pCur->iPage] ); 4813 assert( pCur->apPage[pCur->iPage] );
4803 assertParentIndex( 4814 assertParentIndex(
4804 pCur->apPage[pCur->iPage-1], 4815 pCur->apPage[pCur->iPage-1],
4805 pCur->aiIdx[pCur->iPage-1], 4816 pCur->aiIdx[pCur->iPage-1],
4806 pCur->apPage[pCur->iPage]->pgno 4817 pCur->apPage[pCur->iPage]->pgno
4807 ); 4818 );
4808 testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); 4819 testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell );
4809 pCur->info.nSize = 0; 4820 pCur->info.nSize = 0;
(...skipping 19 matching lines...) Expand all
4829 ** kind of b-tree page (i.e. if when opening the cursor the caller did not 4840 ** kind of b-tree page (i.e. if when opening the cursor the caller did not
4830 ** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D, 4841 ** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D,
4831 ** indicating a table b-tree, or if the caller did specify a KeyInfo 4842 ** indicating a table b-tree, or if the caller did specify a KeyInfo
4832 ** structure the flags byte is set to 0x02 or 0x0A, indicating an index 4843 ** structure the flags byte is set to 0x02 or 0x0A, indicating an index
4833 ** b-tree). 4844 ** b-tree).
4834 */ 4845 */
4835 static int moveToRoot(BtCursor *pCur){ 4846 static int moveToRoot(BtCursor *pCur){
4836 MemPage *pRoot; 4847 MemPage *pRoot;
4837 int rc = SQLITE_OK; 4848 int rc = SQLITE_OK;
4838 4849
4839 assert( cursorHoldsMutex(pCur) ); 4850 assert( cursorOwnsBtShared(pCur) );
4840 assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); 4851 assert( CURSOR_INVALID < CURSOR_REQUIRESEEK );
4841 assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); 4852 assert( CURSOR_VALID < CURSOR_REQUIRESEEK );
4842 assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); 4853 assert( CURSOR_FAULT > CURSOR_REQUIRESEEK );
4843 if( pCur->eState>=CURSOR_REQUIRESEEK ){ 4854 if( pCur->eState>=CURSOR_REQUIRESEEK ){
4844 if( pCur->eState==CURSOR_FAULT ){ 4855 if( pCur->eState==CURSOR_FAULT ){
4845 assert( pCur->skipNext!=SQLITE_OK ); 4856 assert( pCur->skipNext!=SQLITE_OK );
4846 return pCur->skipNext; 4857 return pCur->skipNext;
4847 } 4858 }
4848 sqlite3BtreeClearCursor(pCur); 4859 sqlite3BtreeClearCursor(pCur);
4849 } 4860 }
4850 4861
4851 if( pCur->iPage>=0 ){ 4862 if( pCur->iPage>=0 ){
4852 while( pCur->iPage ){ 4863 if( pCur->iPage ){
4853 assert( pCur->apPage[pCur->iPage]!=0 ); 4864 do{
4854 releasePageNotNull(pCur->apPage[pCur->iPage--]); 4865 assert( pCur->apPage[pCur->iPage]!=0 );
4866 releasePageNotNull(pCur->apPage[pCur->iPage--]);
4867 }while( pCur->iPage);
4868 goto skip_init;
4855 } 4869 }
4856 }else if( pCur->pgnoRoot==0 ){ 4870 }else if( pCur->pgnoRoot==0 ){
4857 pCur->eState = CURSOR_INVALID; 4871 pCur->eState = CURSOR_INVALID;
4858 return SQLITE_OK; 4872 return SQLITE_OK;
4859 }else{ 4873 }else{
4860 assert( pCur->iPage==(-1) ); 4874 assert( pCur->iPage==(-1) );
4861 rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], 4875 rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0],
4862 0, pCur->curPagerFlags); 4876 0, pCur->curPagerFlags);
4863 if( rc!=SQLITE_OK ){ 4877 if( rc!=SQLITE_OK ){
4864 pCur->eState = CURSOR_INVALID; 4878 pCur->eState = CURSOR_INVALID;
4865 return rc; 4879 return rc;
4866 } 4880 }
4867 pCur->iPage = 0; 4881 pCur->iPage = 0;
4868 pCur->curIntKey = pCur->apPage[0]->intKey; 4882 pCur->curIntKey = pCur->apPage[0]->intKey;
4869 } 4883 }
4870 pRoot = pCur->apPage[0]; 4884 pRoot = pCur->apPage[0];
4871 assert( pRoot->pgno==pCur->pgnoRoot ); 4885 assert( pRoot->pgno==pCur->pgnoRoot );
4872 4886
4873 /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor 4887 /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor
4874 ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is 4888 ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is
4875 ** NULL, the caller expects a table b-tree. If this is not the case, 4889 ** NULL, the caller expects a table b-tree. If this is not the case,
4876 ** return an SQLITE_CORRUPT error. 4890 ** return an SQLITE_CORRUPT error.
4877 ** 4891 **
4878 ** Earlier versions of SQLite assumed that this test could not fail 4892 ** Earlier versions of SQLite assumed that this test could not fail
4879 ** if the root page was already loaded when this function was called (i.e. 4893 ** if the root page was already loaded when this function was called (i.e.
4880 ** if pCur->iPage>=0). But this is not so if the database is corrupted 4894 ** if pCur->iPage>=0). But this is not so if the database is corrupted
4881 ** in such a way that page pRoot is linked into a second b-tree table 4895 ** in such a way that page pRoot is linked into a second b-tree table
4882 ** (or the freelist). */ 4896 ** (or the freelist). */
4883 assert( pRoot->intKey==1 || pRoot->intKey==0 ); 4897 assert( pRoot->intKey==1 || pRoot->intKey==0 );
4884 if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ 4898 if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){
4885 return SQLITE_CORRUPT_BKPT; 4899 return SQLITE_CORRUPT_BKPT;
4886 } 4900 }
4887 4901
4902 skip_init:
4888 pCur->aiIdx[0] = 0; 4903 pCur->aiIdx[0] = 0;
4889 pCur->info.nSize = 0; 4904 pCur->info.nSize = 0;
4890 pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); 4905 pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl);
4891 4906
4907 pRoot = pCur->apPage[0];
4892 if( pRoot->nCell>0 ){ 4908 if( pRoot->nCell>0 ){
4893 pCur->eState = CURSOR_VALID; 4909 pCur->eState = CURSOR_VALID;
4894 }else if( !pRoot->leaf ){ 4910 }else if( !pRoot->leaf ){
4895 Pgno subpage; 4911 Pgno subpage;
4896 if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; 4912 if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT;
4897 subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); 4913 subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
4898 pCur->eState = CURSOR_VALID; 4914 pCur->eState = CURSOR_VALID;
4899 rc = moveToChild(pCur, subpage); 4915 rc = moveToChild(pCur, subpage);
4900 }else{ 4916 }else{
4901 pCur->eState = CURSOR_INVALID; 4917 pCur->eState = CURSOR_INVALID;
4902 } 4918 }
4903 return rc; 4919 return rc;
4904 } 4920 }
4905 4921
4906 /* 4922 /*
4907 ** Move the cursor down to the left-most leaf entry beneath the 4923 ** Move the cursor down to the left-most leaf entry beneath the
4908 ** entry to which it is currently pointing. 4924 ** entry to which it is currently pointing.
4909 ** 4925 **
4910 ** The left-most leaf is the one with the smallest key - the first 4926 ** The left-most leaf is the one with the smallest key - the first
4911 ** in ascending order. 4927 ** in ascending order.
4912 */ 4928 */
4913 static int moveToLeftmost(BtCursor *pCur){ 4929 static int moveToLeftmost(BtCursor *pCur){
4914 Pgno pgno; 4930 Pgno pgno;
4915 int rc = SQLITE_OK; 4931 int rc = SQLITE_OK;
4916 MemPage *pPage; 4932 MemPage *pPage;
4917 4933
4918 assert( cursorHoldsMutex(pCur) ); 4934 assert( cursorOwnsBtShared(pCur) );
4919 assert( pCur->eState==CURSOR_VALID ); 4935 assert( pCur->eState==CURSOR_VALID );
4920 while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ 4936 while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){
4921 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); 4937 assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
4922 pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage])); 4938 pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage]));
4923 rc = moveToChild(pCur, pgno); 4939 rc = moveToChild(pCur, pgno);
4924 } 4940 }
4925 return rc; 4941 return rc;
4926 } 4942 }
4927 4943
4928 /* 4944 /*
4929 ** Move the cursor down to the right-most leaf entry beneath the 4945 ** Move the cursor down to the right-most leaf entry beneath the
4930 ** page to which it is currently pointing. Notice the difference 4946 ** page to which it is currently pointing. Notice the difference
4931 ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() 4947 ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
4932 ** finds the left-most entry beneath the *entry* whereas moveToRightmost() 4948 ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
4933 ** finds the right-most entry beneath the *page*. 4949 ** finds the right-most entry beneath the *page*.
4934 ** 4950 **
4935 ** The right-most entry is the one with the largest key - the last 4951 ** The right-most entry is the one with the largest key - the last
4936 ** key in ascending order. 4952 ** key in ascending order.
4937 */ 4953 */
4938 static int moveToRightmost(BtCursor *pCur){ 4954 static int moveToRightmost(BtCursor *pCur){
4939 Pgno pgno; 4955 Pgno pgno;
4940 int rc = SQLITE_OK; 4956 int rc = SQLITE_OK;
4941 MemPage *pPage = 0; 4957 MemPage *pPage = 0;
4942 4958
4943 assert( cursorHoldsMutex(pCur) ); 4959 assert( cursorOwnsBtShared(pCur) );
4944 assert( pCur->eState==CURSOR_VALID ); 4960 assert( pCur->eState==CURSOR_VALID );
4945 while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){ 4961 while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){
4946 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); 4962 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
4947 pCur->aiIdx[pCur->iPage] = pPage->nCell; 4963 pCur->aiIdx[pCur->iPage] = pPage->nCell;
4948 rc = moveToChild(pCur, pgno); 4964 rc = moveToChild(pCur, pgno);
4949 if( rc ) return rc; 4965 if( rc ) return rc;
4950 } 4966 }
4951 pCur->aiIdx[pCur->iPage] = pPage->nCell-1; 4967 pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
4952 assert( pCur->info.nSize==0 ); 4968 assert( pCur->info.nSize==0 );
4953 assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); 4969 assert( (pCur->curFlags & BTCF_ValidNKey)==0 );
4954 return SQLITE_OK; 4970 return SQLITE_OK;
4955 } 4971 }
4956 4972
4957 /* Move the cursor to the first entry in the table. Return SQLITE_OK 4973 /* Move the cursor to the first entry in the table. Return SQLITE_OK
4958 ** on success. Set *pRes to 0 if the cursor actually points to something 4974 ** on success. Set *pRes to 0 if the cursor actually points to something
4959 ** or set *pRes to 1 if the table is empty. 4975 ** or set *pRes to 1 if the table is empty.
4960 */ 4976 */
4961 int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ 4977 int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
4962 int rc; 4978 int rc;
4963 4979
4964 assert( cursorHoldsMutex(pCur) ); 4980 assert( cursorOwnsBtShared(pCur) );
4965 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 4981 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
4966 rc = moveToRoot(pCur); 4982 rc = moveToRoot(pCur);
4967 if( rc==SQLITE_OK ){ 4983 if( rc==SQLITE_OK ){
4968 if( pCur->eState==CURSOR_INVALID ){ 4984 if( pCur->eState==CURSOR_INVALID ){
4969 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); 4985 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
4970 *pRes = 1; 4986 *pRes = 1;
4971 }else{ 4987 }else{
4972 assert( pCur->apPage[pCur->iPage]->nCell>0 ); 4988 assert( pCur->apPage[pCur->iPage]->nCell>0 );
4973 *pRes = 0; 4989 *pRes = 0;
4974 rc = moveToLeftmost(pCur); 4990 rc = moveToLeftmost(pCur);
4975 } 4991 }
4976 } 4992 }
4977 return rc; 4993 return rc;
4978 } 4994 }
4979 4995
4980 /* Move the cursor to the last entry in the table. Return SQLITE_OK 4996 /* Move the cursor to the last entry in the table. Return SQLITE_OK
4981 ** on success. Set *pRes to 0 if the cursor actually points to something 4997 ** on success. Set *pRes to 0 if the cursor actually points to something
4982 ** or set *pRes to 1 if the table is empty. 4998 ** or set *pRes to 1 if the table is empty.
4983 */ 4999 */
4984 int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ 5000 int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
4985 int rc; 5001 int rc;
4986 5002
4987 assert( cursorHoldsMutex(pCur) ); 5003 assert( cursorOwnsBtShared(pCur) );
4988 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 5004 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
4989 5005
4990 /* If the cursor already points to the last entry, this is a no-op. */ 5006 /* If the cursor already points to the last entry, this is a no-op. */
4991 if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ 5007 if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){
4992 #ifdef SQLITE_DEBUG 5008 #ifdef SQLITE_DEBUG
4993 /* This block serves to assert() that the cursor really does point 5009 /* This block serves to assert() that the cursor really does point
4994 ** to the last entry in the b-tree. */ 5010 ** to the last entry in the b-tree. */
4995 int ii; 5011 int ii;
4996 for(ii=0; ii<pCur->iPage; ii++){ 5012 for(ii=0; ii<pCur->iPage; ii++){
4997 assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell ); 5013 assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell );
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
5055 int sqlite3BtreeMovetoUnpacked( 5071 int sqlite3BtreeMovetoUnpacked(
5056 BtCursor *pCur, /* The cursor to be moved */ 5072 BtCursor *pCur, /* The cursor to be moved */
5057 UnpackedRecord *pIdxKey, /* Unpacked index key */ 5073 UnpackedRecord *pIdxKey, /* Unpacked index key */
5058 i64 intKey, /* The table key */ 5074 i64 intKey, /* The table key */
5059 int biasRight, /* If true, bias the search to the high end */ 5075 int biasRight, /* If true, bias the search to the high end */
5060 int *pRes /* Write search results here */ 5076 int *pRes /* Write search results here */
5061 ){ 5077 ){
5062 int rc; 5078 int rc;
5063 RecordCompare xRecordCompare; 5079 RecordCompare xRecordCompare;
5064 5080
5065 assert( cursorHoldsMutex(pCur) ); 5081 assert( cursorOwnsBtShared(pCur) );
5066 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 5082 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5067 assert( pRes ); 5083 assert( pRes );
5068 assert( (pIdxKey==0)==(pCur->pKeyInfo==0) ); 5084 assert( (pIdxKey==0)==(pCur->pKeyInfo==0) );
5085 assert( pCur->eState!=CURSOR_VALID || (pIdxKey==0)==(pCur->curIntKey!=0) );
5069 5086
5070 /* If the cursor is already positioned at the point we are trying 5087 /* If the cursor is already positioned at the point we are trying
5071 ** to move to, then just return without doing any work */ 5088 ** to move to, then just return without doing any work */
5072 if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 5089 if( pIdxKey==0
5073 && pCur->curIntKey 5090 && pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
5074 ){ 5091 ){
5075 if( pCur->info.nKey==intKey ){ 5092 if( pCur->info.nKey==intKey ){
5076 *pRes = 0; 5093 *pRes = 0;
5077 return SQLITE_OK; 5094 return SQLITE_OK;
5078 } 5095 }
5079 if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKey<intKey ){ 5096 if( pCur->info.nKey<intKey ){
5080 *pRes = -1; 5097 if( (pCur->curFlags & BTCF_AtLast)!=0 ){
5081 return SQLITE_OK; 5098 *pRes = -1;
5099 return SQLITE_OK;
5100 }
5101 /* If the requested key is one more than the previous key, then
5102 ** try to get there using sqlite3BtreeNext() rather than a full
5103 ** binary search. This is an optimization only. The correct answer
5104 ** is still obtained without this ase, only a little more slowely */
5105 if( pCur->info.nKey+1==intKey && !pCur->skipNext ){
5106 *pRes = 0;
5107 rc = sqlite3BtreeNext(pCur, pRes);
5108 if( rc ) return rc;
5109 if( *pRes==0 ){
5110 getCellInfo(pCur);
5111 if( pCur->info.nKey==intKey ){
5112 return SQLITE_OK;
5113 }
5114 }
5115 }
5082 } 5116 }
5083 } 5117 }
5084 5118
5085 if( pIdxKey ){ 5119 if( pIdxKey ){
5086 xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); 5120 xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
5087 pIdxKey->errCode = 0; 5121 pIdxKey->errCode = 0;
5088 assert( pIdxKey->default_rc==1 5122 assert( pIdxKey->default_rc==1
5089 || pIdxKey->default_rc==0 5123 || pIdxKey->default_rc==0
5090 || pIdxKey->default_rc==-1 5124 || pIdxKey->default_rc==-1
5091 ); 5125 );
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
5137 } 5171 }
5138 getVarint(pCell, (u64*)&nCellKey); 5172 getVarint(pCell, (u64*)&nCellKey);
5139 if( nCellKey<intKey ){ 5173 if( nCellKey<intKey ){
5140 lwr = idx+1; 5174 lwr = idx+1;
5141 if( lwr>upr ){ c = -1; break; } 5175 if( lwr>upr ){ c = -1; break; }
5142 }else if( nCellKey>intKey ){ 5176 }else if( nCellKey>intKey ){
5143 upr = idx-1; 5177 upr = idx-1;
5144 if( lwr>upr ){ c = +1; break; } 5178 if( lwr>upr ){ c = +1; break; }
5145 }else{ 5179 }else{
5146 assert( nCellKey==intKey ); 5180 assert( nCellKey==intKey );
5147 pCur->curFlags |= BTCF_ValidNKey;
5148 pCur->info.nKey = nCellKey;
5149 pCur->aiIdx[pCur->iPage] = (u16)idx; 5181 pCur->aiIdx[pCur->iPage] = (u16)idx;
5150 if( !pPage->leaf ){ 5182 if( !pPage->leaf ){
5151 lwr = idx; 5183 lwr = idx;
5152 goto moveto_next_layer; 5184 goto moveto_next_layer;
5153 }else{ 5185 }else{
5186 pCur->curFlags |= BTCF_ValidNKey;
5187 pCur->info.nKey = nCellKey;
5188 pCur->info.nSize = 0;
5154 *pRes = 0; 5189 *pRes = 0;
5155 rc = SQLITE_OK; 5190 return SQLITE_OK;
5156 goto moveto_finish;
5157 } 5191 }
5158 } 5192 }
5159 assert( lwr+upr>=0 ); 5193 assert( lwr+upr>=0 );
5160 idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ 5194 idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */
5161 } 5195 }
5162 }else{ 5196 }else{
5163 for(;;){ 5197 for(;;){
5164 int nCell; /* Size of the pCell cell in bytes */ 5198 int nCell; /* Size of the pCell cell in bytes */
5165 pCell = findCellPastPtr(pPage, idx); 5199 pCell = findCellPastPtr(pPage, idx);
5166 5200
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5203 testcase( nCell<0 ); /* True if key size is 2^32 or more */ 5237 testcase( nCell<0 ); /* True if key size is 2^32 or more */
5204 testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ 5238 testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */
5205 testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ 5239 testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */
5206 testcase( nCell==2 ); /* Minimum legal index key size */ 5240 testcase( nCell==2 ); /* Minimum legal index key size */
5207 if( nCell<2 ){ 5241 if( nCell<2 ){
5208 rc = SQLITE_CORRUPT_BKPT; 5242 rc = SQLITE_CORRUPT_BKPT;
5209 goto moveto_finish; 5243 goto moveto_finish;
5210 } 5244 }
5211 pCellKey = sqlite3Malloc( nCell+18 ); 5245 pCellKey = sqlite3Malloc( nCell+18 );
5212 if( pCellKey==0 ){ 5246 if( pCellKey==0 ){
5213 rc = SQLITE_NOMEM; 5247 rc = SQLITE_NOMEM_BKPT;
5214 goto moveto_finish; 5248 goto moveto_finish;
5215 } 5249 }
5216 pCur->aiIdx[pCur->iPage] = (u16)idx; 5250 pCur->aiIdx[pCur->iPage] = (u16)idx;
5217 rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); 5251 rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0);
5252 pCur->curFlags &= ~BTCF_ValidOvfl;
5218 if( rc ){ 5253 if( rc ){
5219 sqlite3_free(pCellKey); 5254 sqlite3_free(pCellKey);
5220 goto moveto_finish; 5255 goto moveto_finish;
5221 } 5256 }
5222 c = xRecordCompare(nCell, pCellKey, pIdxKey); 5257 c = xRecordCompare(nCell, pCellKey, pIdxKey);
5223 sqlite3_free(pCellKey); 5258 sqlite3_free(pCellKey);
5224 } 5259 }
5225 assert( 5260 assert(
5226 (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) 5261 (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
5227 && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed) 5262 && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
(...skipping 29 matching lines...) Expand all
5257 chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); 5292 chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
5258 }else{ 5293 }else{
5259 chldPg = get4byte(findCell(pPage, lwr)); 5294 chldPg = get4byte(findCell(pPage, lwr));
5260 } 5295 }
5261 pCur->aiIdx[pCur->iPage] = (u16)lwr; 5296 pCur->aiIdx[pCur->iPage] = (u16)lwr;
5262 rc = moveToChild(pCur, chldPg); 5297 rc = moveToChild(pCur, chldPg);
5263 if( rc ) break; 5298 if( rc ) break;
5264 } 5299 }
5265 moveto_finish: 5300 moveto_finish:
5266 pCur->info.nSize = 0; 5301 pCur->info.nSize = 0;
5267 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); 5302 assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
5268 return rc; 5303 return rc;
5269 } 5304 }
5270 5305
5271 5306
5272 /* 5307 /*
5273 ** Return TRUE if the cursor is not pointing at an entry of the table. 5308 ** Return TRUE if the cursor is not pointing at an entry of the table.
5274 ** 5309 **
5275 ** TRUE will be returned after a call to sqlite3BtreeNext() moves 5310 ** TRUE will be returned after a call to sqlite3BtreeNext() moves
5276 ** past the last entry in the table or sqlite3BtreePrev() moves past 5311 ** past the last entry in the table or sqlite3BtreePrev() moves past
5277 ** the first entry. TRUE is also returned if the table is empty. 5312 ** the first entry. TRUE is also returned if the table is empty.
(...skipping 25 matching lines...) Expand all
5303 ** Zero is the common case. The btree implementation is free to use the 5338 ** Zero is the common case. The btree implementation is free to use the
5304 ** initial *pRes value as a hint to improve performance, but the current 5339 ** initial *pRes value as a hint to improve performance, but the current
5305 ** SQLite btree implementation does not. (Note that the comdb2 btree 5340 ** SQLite btree implementation does not. (Note that the comdb2 btree
5306 ** implementation does use this hint, however.) 5341 ** implementation does use this hint, however.)
5307 */ 5342 */
5308 static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){ 5343 static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){
5309 int rc; 5344 int rc;
5310 int idx; 5345 int idx;
5311 MemPage *pPage; 5346 MemPage *pPage;
5312 5347
5313 assert( cursorHoldsMutex(pCur) ); 5348 assert( cursorOwnsBtShared(pCur) );
5314 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); 5349 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
5315 assert( *pRes==0 ); 5350 assert( *pRes==0 );
5316 if( pCur->eState!=CURSOR_VALID ){ 5351 if( pCur->eState!=CURSOR_VALID ){
5317 assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); 5352 assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
5318 rc = restoreCursorPosition(pCur); 5353 rc = restoreCursorPosition(pCur);
5319 if( rc!=SQLITE_OK ){ 5354 if( rc!=SQLITE_OK ){
5320 return rc; 5355 return rc;
5321 } 5356 }
5322 if( CURSOR_INVALID==pCur->eState ){ 5357 if( CURSOR_INVALID==pCur->eState ){
5323 *pRes = 1; 5358 *pRes = 1;
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
5367 } 5402 }
5368 } 5403 }
5369 if( pPage->leaf ){ 5404 if( pPage->leaf ){
5370 return SQLITE_OK; 5405 return SQLITE_OK;
5371 }else{ 5406 }else{
5372 return moveToLeftmost(pCur); 5407 return moveToLeftmost(pCur);
5373 } 5408 }
5374 } 5409 }
5375 int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ 5410 int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
5376 MemPage *pPage; 5411 MemPage *pPage;
5377 assert( cursorHoldsMutex(pCur) ); 5412 assert( cursorOwnsBtShared(pCur) );
5378 assert( pRes!=0 ); 5413 assert( pRes!=0 );
5379 assert( *pRes==0 || *pRes==1 ); 5414 assert( *pRes==0 || *pRes==1 );
5380 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); 5415 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
5381 pCur->info.nSize = 0; 5416 pCur->info.nSize = 0;
5382 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); 5417 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
5383 *pRes = 0; 5418 *pRes = 0;
5384 if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes); 5419 if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes);
5385 pPage = pCur->apPage[pCur->iPage]; 5420 pPage = pCur->apPage[pCur->iPage];
5386 if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){ 5421 if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){
5387 pCur->aiIdx[pCur->iPage]--; 5422 pCur->aiIdx[pCur->iPage]--;
(...skipping 24 matching lines...) Expand all
5412 ** a unique index. Otherwise the caller will have set *pRes to zero. 5447 ** a unique index. Otherwise the caller will have set *pRes to zero.
5413 ** Zero is the common case. The btree implementation is free to use the 5448 ** Zero is the common case. The btree implementation is free to use the
5414 ** initial *pRes value as a hint to improve performance, but the current 5449 ** initial *pRes value as a hint to improve performance, but the current
5415 ** SQLite btree implementation does not. (Note that the comdb2 btree 5450 ** SQLite btree implementation does not. (Note that the comdb2 btree
5416 ** implementation does use this hint, however.) 5451 ** implementation does use this hint, however.)
5417 */ 5452 */
5418 static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){ 5453 static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){
5419 int rc; 5454 int rc;
5420 MemPage *pPage; 5455 MemPage *pPage;
5421 5456
5422 assert( cursorHoldsMutex(pCur) ); 5457 assert( cursorOwnsBtShared(pCur) );
5423 assert( pRes!=0 ); 5458 assert( pRes!=0 );
5424 assert( *pRes==0 ); 5459 assert( *pRes==0 );
5425 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); 5460 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
5426 assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); 5461 assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 );
5427 assert( pCur->info.nSize==0 ); 5462 assert( pCur->info.nSize==0 );
5428 if( pCur->eState!=CURSOR_VALID ){ 5463 if( pCur->eState!=CURSOR_VALID ){
5429 rc = restoreCursorPosition(pCur); 5464 rc = restoreCursorPosition(pCur);
5430 if( rc!=SQLITE_OK ){ 5465 if( rc!=SQLITE_OK ){
5431 return rc; 5466 return rc;
5432 } 5467 }
(...skipping 22 matching lines...) Expand all
5455 }else{ 5490 }else{
5456 while( pCur->aiIdx[pCur->iPage]==0 ){ 5491 while( pCur->aiIdx[pCur->iPage]==0 ){
5457 if( pCur->iPage==0 ){ 5492 if( pCur->iPage==0 ){
5458 pCur->eState = CURSOR_INVALID; 5493 pCur->eState = CURSOR_INVALID;
5459 *pRes = 1; 5494 *pRes = 1;
5460 return SQLITE_OK; 5495 return SQLITE_OK;
5461 } 5496 }
5462 moveToParent(pCur); 5497 moveToParent(pCur);
5463 } 5498 }
5464 assert( pCur->info.nSize==0 ); 5499 assert( pCur->info.nSize==0 );
5465 assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 ); 5500 assert( (pCur->curFlags & (BTCF_ValidOvfl))==0 );
5466 5501
5467 pCur->aiIdx[pCur->iPage]--; 5502 pCur->aiIdx[pCur->iPage]--;
5468 pPage = pCur->apPage[pCur->iPage]; 5503 pPage = pCur->apPage[pCur->iPage];
5469 if( pPage->intKey && !pPage->leaf ){ 5504 if( pPage->intKey && !pPage->leaf ){
5470 rc = sqlite3BtreePrevious(pCur, pRes); 5505 rc = sqlite3BtreePrevious(pCur, pRes);
5471 }else{ 5506 }else{
5472 rc = SQLITE_OK; 5507 rc = SQLITE_OK;
5473 } 5508 }
5474 } 5509 }
5475 return rc; 5510 return rc;
5476 } 5511 }
5477 int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ 5512 int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
5478 assert( cursorHoldsMutex(pCur) ); 5513 assert( cursorOwnsBtShared(pCur) );
5479 assert( pRes!=0 ); 5514 assert( pRes!=0 );
5480 assert( *pRes==0 || *pRes==1 ); 5515 assert( *pRes==0 || *pRes==1 );
5481 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); 5516 assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
5482 *pRes = 0; 5517 *pRes = 0;
5483 pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); 5518 pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey);
5484 pCur->info.nSize = 0; 5519 pCur->info.nSize = 0;
5485 if( pCur->eState!=CURSOR_VALID 5520 if( pCur->eState!=CURSOR_VALID
5486 || pCur->aiIdx[pCur->iPage]==0 5521 || pCur->aiIdx[pCur->iPage]==0
5487 || pCur->apPage[pCur->iPage]->leaf==0 5522 || pCur->apPage[pCur->iPage]->leaf==0
5488 ){ 5523 ){
(...skipping 482 matching lines...) Expand 10 before | Expand all | Expand 10 after
5971 } 6006 }
5972 6007
5973 /* 6008 /*
5974 ** Free any overflow pages associated with the given Cell. Write the 6009 ** Free any overflow pages associated with the given Cell. Write the
5975 ** local Cell size (the number of bytes on the original page, omitting 6010 ** local Cell size (the number of bytes on the original page, omitting
5976 ** overflow) into *pnSize. 6011 ** overflow) into *pnSize.
5977 */ 6012 */
5978 static int clearCell( 6013 static int clearCell(
5979 MemPage *pPage, /* The page that contains the Cell */ 6014 MemPage *pPage, /* The page that contains the Cell */
5980 unsigned char *pCell, /* First byte of the Cell */ 6015 unsigned char *pCell, /* First byte of the Cell */
5981 u16 *pnSize /* Write the size of the Cell here */ 6016 CellInfo *pInfo /* Size information about the cell */
5982 ){ 6017 ){
5983 BtShared *pBt = pPage->pBt; 6018 BtShared *pBt = pPage->pBt;
5984 CellInfo info;
5985 Pgno ovflPgno; 6019 Pgno ovflPgno;
5986 int rc; 6020 int rc;
5987 int nOvfl; 6021 int nOvfl;
5988 u32 ovflPageSize; 6022 u32 ovflPageSize;
5989 6023
5990 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6024 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
5991 pPage->xParseCell(pPage, pCell, &info); 6025 pPage->xParseCell(pPage, pCell, pInfo);
5992 *pnSize = info.nSize; 6026 if( pInfo->nLocal==pInfo->nPayload ){
5993 if( info.nLocal==info.nPayload ){
5994 return SQLITE_OK; /* No overflow pages. Return without doing anything */ 6027 return SQLITE_OK; /* No overflow pages. Return without doing anything */
5995 } 6028 }
5996 if( pCell+info.nSize-1 > pPage->aData+pPage->maskPage ){ 6029 if( pCell+pInfo->nSize-1 > pPage->aData+pPage->maskPage ){
5997 return SQLITE_CORRUPT_BKPT; /* Cell extends past end of page */ 6030 return SQLITE_CORRUPT_BKPT; /* Cell extends past end of page */
5998 } 6031 }
5999 ovflPgno = get4byte(pCell + info.nSize - 4); 6032 ovflPgno = get4byte(pCell + pInfo->nSize - 4);
6000 assert( pBt->usableSize > 4 ); 6033 assert( pBt->usableSize > 4 );
6001 ovflPageSize = pBt->usableSize - 4; 6034 ovflPageSize = pBt->usableSize - 4;
6002 nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; 6035 nOvfl = (pInfo->nPayload - pInfo->nLocal + ovflPageSize - 1)/ovflPageSize;
6003 assert( nOvfl>0 || 6036 assert( nOvfl>0 ||
6004 (CORRUPT_DB && (info.nPayload + ovflPageSize)<ovflPageSize) 6037 (CORRUPT_DB && (pInfo->nPayload + ovflPageSize)<ovflPageSize)
6005 ); 6038 );
6006 while( nOvfl-- ){ 6039 while( nOvfl-- ){
6007 Pgno iNext = 0; 6040 Pgno iNext = 0;
6008 MemPage *pOvfl = 0; 6041 MemPage *pOvfl = 0;
6009 if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){ 6042 if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){
6010 /* 0 is not a legal page number and page 1 cannot be an 6043 /* 0 is not a legal page number and page 1 cannot be an
6011 ** overflow page. Therefore if ovflPgno<2 or past the end of the 6044 ** overflow page. Therefore if ovflPgno<2 or past the end of the
6012 ** file the database must be corrupt. */ 6045 ** file the database must be corrupt. */
6013 return SQLITE_CORRUPT_BKPT; 6046 return SQLITE_CORRUPT_BKPT;
6014 } 6047 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
6052 ** for pCell[]. 6085 ** for pCell[].
6053 ** 6086 **
6054 ** Note that pCell does not necessary need to point to the pPage->aData 6087 ** Note that pCell does not necessary need to point to the pPage->aData
6055 ** area. pCell might point to some temporary storage. The cell will 6088 ** area. pCell might point to some temporary storage. The cell will
6056 ** be constructed in this temporary area then copied into pPage->aData 6089 ** be constructed in this temporary area then copied into pPage->aData
6057 ** later. 6090 ** later.
6058 */ 6091 */
6059 static int fillInCell( 6092 static int fillInCell(
6060 MemPage *pPage, /* The page that contains the cell */ 6093 MemPage *pPage, /* The page that contains the cell */
6061 unsigned char *pCell, /* Complete text of the cell */ 6094 unsigned char *pCell, /* Complete text of the cell */
6062 const void *pKey, i64 nKey, /* The key */ 6095 const BtreePayload *pX, /* Payload with which to construct the cell */
6063 const void *pData,int nData, /* The data */
6064 int nZero, /* Extra zero bytes to append to pData */
6065 int *pnSize /* Write cell size here */ 6096 int *pnSize /* Write cell size here */
6066 ){ 6097 ){
6067 int nPayload; 6098 int nPayload;
6068 const u8 *pSrc; 6099 const u8 *pSrc;
6069 int nSrc, n, rc; 6100 int nSrc, n, rc;
6070 int spaceLeft; 6101 int spaceLeft;
6071 MemPage *pOvfl = 0; 6102 MemPage *pOvfl = 0;
6072 MemPage *pToRelease = 0; 6103 MemPage *pToRelease = 0;
6073 unsigned char *pPrior; 6104 unsigned char *pPrior;
6074 unsigned char *pPayload; 6105 unsigned char *pPayload;
6075 BtShared *pBt = pPage->pBt; 6106 BtShared *pBt = pPage->pBt;
6076 Pgno pgnoOvfl = 0; 6107 Pgno pgnoOvfl = 0;
6077 int nHeader; 6108 int nHeader;
6078 6109
6079 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6110 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
6080 6111
6081 /* pPage is not necessarily writeable since pCell might be auxiliary 6112 /* pPage is not necessarily writeable since pCell might be auxiliary
6082 ** buffer space that is separate from the pPage buffer area */ 6113 ** buffer space that is separate from the pPage buffer area */
6083 assert( pCell<pPage->aData || pCell>=&pPage->aData[pBt->pageSize] 6114 assert( pCell<pPage->aData || pCell>=&pPage->aData[pBt->pageSize]
6084 || sqlite3PagerIswriteable(pPage->pDbPage) ); 6115 || sqlite3PagerIswriteable(pPage->pDbPage) );
6085 6116
6086 /* Fill in the header. */ 6117 /* Fill in the header. */
6087 nHeader = pPage->childPtrSize; 6118 nHeader = pPage->childPtrSize;
6088 nPayload = nData + nZero; 6119 if( pPage->intKey ){
6089 if( pPage->intKeyLeaf ){ 6120 nPayload = pX->nData + pX->nZero;
6121 pSrc = pX->pData;
6122 nSrc = pX->nData;
6123 assert( pPage->intKeyLeaf ); /* fillInCell() only called for leaves */
6090 nHeader += putVarint32(&pCell[nHeader], nPayload); 6124 nHeader += putVarint32(&pCell[nHeader], nPayload);
6125 nHeader += putVarint(&pCell[nHeader], *(u64*)&pX->nKey);
6091 }else{ 6126 }else{
6092 assert( nData==0 ); 6127 assert( pX->nKey<=0x7fffffff && pX->pKey!=0 );
6093 assert( nZero==0 ); 6128 nSrc = nPayload = (int)pX->nKey;
6129 pSrc = pX->pKey;
6130 nHeader += putVarint32(&pCell[nHeader], nPayload);
6094 } 6131 }
6095 nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
6096 6132
6097 /* Fill in the payload size */ 6133 /* Fill in the payload */
6098 if( pPage->intKey ){
6099 pSrc = pData;
6100 nSrc = nData;
6101 nData = 0;
6102 }else{
6103 assert( nKey<=0x7fffffff && pKey!=0 );
6104 nPayload = (int)nKey;
6105 pSrc = pKey;
6106 nSrc = (int)nKey;
6107 }
6108 if( nPayload<=pPage->maxLocal ){ 6134 if( nPayload<=pPage->maxLocal ){
6109 n = nHeader + nPayload; 6135 n = nHeader + nPayload;
6110 testcase( n==3 ); 6136 testcase( n==3 );
6111 testcase( n==4 ); 6137 testcase( n==4 );
6112 if( n<4 ) n = 4; 6138 if( n<4 ) n = 4;
6113 *pnSize = n; 6139 *pnSize = n;
6114 spaceLeft = nPayload; 6140 spaceLeft = nPayload;
6115 pPrior = pCell; 6141 pPrior = pCell;
6116 }else{ 6142 }else{
6117 int mn = pPage->minLocal; 6143 int mn = pPage->minLocal;
(...skipping 16 matching lines...) Expand all
6134 ** *pnSize Size of the local cell (not counting overflow pages) 6160 ** *pnSize Size of the local cell (not counting overflow pages)
6135 ** pPrior Where to write the pgno of the first overflow page 6161 ** pPrior Where to write the pgno of the first overflow page
6136 ** 6162 **
6137 ** Use a call to btreeParseCellPtr() to verify that the values above 6163 ** Use a call to btreeParseCellPtr() to verify that the values above
6138 ** were computed correctly. 6164 ** were computed correctly.
6139 */ 6165 */
6140 #if SQLITE_DEBUG 6166 #if SQLITE_DEBUG
6141 { 6167 {
6142 CellInfo info; 6168 CellInfo info;
6143 pPage->xParseCell(pPage, pCell, &info); 6169 pPage->xParseCell(pPage, pCell, &info);
6144 assert( nHeader=(int)(info.pPayload - pCell) ); 6170 assert( nHeader==(int)(info.pPayload - pCell) );
6145 assert( info.nKey==nKey ); 6171 assert( info.nKey==pX->nKey );
6146 assert( *pnSize == info.nSize ); 6172 assert( *pnSize == info.nSize );
6147 assert( spaceLeft == info.nLocal ); 6173 assert( spaceLeft == info.nLocal );
6148 } 6174 }
6149 #endif 6175 #endif
6150 6176
6151 /* Write the payload into the local Cell and any extra into overflow pages */ 6177 /* Write the payload into the local Cell and any extra into overflow pages */
6152 while( nPayload>0 ){ 6178 while( nPayload>0 ){
6153 if( spaceLeft==0 ){ 6179 if( spaceLeft==0 ){
6154 #ifndef SQLITE_OMIT_AUTOVACUUM 6180 #ifndef SQLITE_OMIT_AUTOVACUUM
6155 Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ 6181 Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
6220 assert( pSrc ); 6246 assert( pSrc );
6221 memcpy(pPayload, pSrc, n); 6247 memcpy(pPayload, pSrc, n);
6222 }else{ 6248 }else{
6223 memset(pPayload, 0, n); 6249 memset(pPayload, 0, n);
6224 } 6250 }
6225 nPayload -= n; 6251 nPayload -= n;
6226 pPayload += n; 6252 pPayload += n;
6227 pSrc += n; 6253 pSrc += n;
6228 nSrc -= n; 6254 nSrc -= n;
6229 spaceLeft -= n; 6255 spaceLeft -= n;
6230 if( nSrc==0 ){
6231 nSrc = nData;
6232 pSrc = pData;
6233 }
6234 } 6256 }
6235 releasePage(pToRelease); 6257 releasePage(pToRelease);
6236 return SQLITE_OK; 6258 return SQLITE_OK;
6237 } 6259 }
6238 6260
6239 /* 6261 /*
6240 ** Remove the i-th cell from pPage. This routine effects pPage only. 6262 ** Remove the i-th cell from pPage. This routine effects pPage only.
6241 ** The cell content is not freed or deallocated. It is assumed that 6263 ** The cell content is not freed or deallocated. It is assumed that
6242 ** the cell content has been copied someplace else. This routine just 6264 ** the cell content has been copied someplace else. This routine just
6243 ** removes the reference to the cell from pPage. 6265 ** removes the reference to the cell from pPage.
6244 ** 6266 **
6245 ** "sz" must be the number of bytes in the cell. 6267 ** "sz" must be the number of bytes in the cell.
6246 */ 6268 */
6247 static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ 6269 static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
6248 u32 pc; /* Offset to cell content of cell being deleted */ 6270 u32 pc; /* Offset to cell content of cell being deleted */
6249 u8 *data; /* pPage->aData */ 6271 u8 *data; /* pPage->aData */
6250 u8 *ptr; /* Used to move bytes around within data[] */ 6272 u8 *ptr; /* Used to move bytes around within data[] */
6251 int rc; /* The return code */ 6273 int rc; /* The return code */
6252 int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ 6274 int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */
6253 6275
6254 if( *pRC ) return; 6276 if( *pRC ) return;
6255
6256 assert( idx>=0 && idx<pPage->nCell ); 6277 assert( idx>=0 && idx<pPage->nCell );
6257 assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); 6278 assert( CORRUPT_DB || sz==cellSize(pPage, idx) );
6258 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 6279 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
6259 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6280 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
6260 data = pPage->aData; 6281 data = pPage->aData;
6261 ptr = &pPage->aCellIdx[2*idx]; 6282 ptr = &pPage->aCellIdx[2*idx];
6262 pc = get2byte(ptr); 6283 pc = get2byte(ptr);
6263 hdr = pPage->hdrOffset; 6284 hdr = pPage->hdrOffset;
6264 testcase( pc==get2byte(&data[hdr+5]) ); 6285 testcase( pc==get2byte(&data[hdr+5]) );
6265 testcase( pc+sz==pPage->pBt->usableSize ); 6286 testcase( pc+sz==pPage->pBt->usableSize );
(...skipping 24 matching lines...) Expand all
6290 ** Insert a new cell on pPage at cell index "i". pCell points to the 6311 ** Insert a new cell on pPage at cell index "i". pCell points to the
6291 ** content of the cell. 6312 ** content of the cell.
6292 ** 6313 **
6293 ** If the cell content will fit on the page, then put it there. If it 6314 ** If the cell content will fit on the page, then put it there. If it
6294 ** will not fit, then make a copy of the cell content into pTemp if 6315 ** will not fit, then make a copy of the cell content into pTemp if
6295 ** pTemp is not null. Regardless of pTemp, allocate a new entry 6316 ** pTemp is not null. Regardless of pTemp, allocate a new entry
6296 ** in pPage->apOvfl[] and make it point to the cell content (either 6317 ** in pPage->apOvfl[] and make it point to the cell content (either
6297 ** in pTemp or the original pCell) and also record its index. 6318 ** in pTemp or the original pCell) and also record its index.
6298 ** Allocating a new entry in pPage->aCell[] implies that 6319 ** Allocating a new entry in pPage->aCell[] implies that
6299 ** pPage->nOverflow is incremented. 6320 ** pPage->nOverflow is incremented.
6321 **
6322 ** *pRC must be SQLITE_OK when this routine is called.
6300 */ 6323 */
6301 static void insertCell( 6324 static void insertCell(
6302 MemPage *pPage, /* Page into which we are copying */ 6325 MemPage *pPage, /* Page into which we are copying */
6303 int i, /* New cell becomes the i-th cell of the page */ 6326 int i, /* New cell becomes the i-th cell of the page */
6304 u8 *pCell, /* Content of the new cell */ 6327 u8 *pCell, /* Content of the new cell */
6305 int sz, /* Bytes of content in pCell */ 6328 int sz, /* Bytes of content in pCell */
6306 u8 *pTemp, /* Temp storage space for pCell, if needed */ 6329 u8 *pTemp, /* Temp storage space for pCell, if needed */
6307 Pgno iChild, /* If non-zero, replace first 4 bytes with this value */ 6330 Pgno iChild, /* If non-zero, replace first 4 bytes with this value */
6308 int *pRC /* Read and write return code from here */ 6331 int *pRC /* Read and write return code from here */
6309 ){ 6332 ){
6310 int idx = 0; /* Where to write new cell content in data[] */ 6333 int idx = 0; /* Where to write new cell content in data[] */
6311 int j; /* Loop counter */ 6334 int j; /* Loop counter */
6312 u8 *data; /* The content of the whole page */ 6335 u8 *data; /* The content of the whole page */
6313 u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ 6336 u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */
6314 6337
6315 if( *pRC ) return; 6338 assert( *pRC==SQLITE_OK );
6316
6317 assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); 6339 assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
6318 assert( MX_CELL(pPage->pBt)<=10921 ); 6340 assert( MX_CELL(pPage->pBt)<=10921 );
6319 assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); 6341 assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
6320 assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); 6342 assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
6321 assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); 6343 assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
6322 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6344 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
6323 /* The cell should normally be sized correctly. However, when moving a 6345 /* The cell should normally be sized correctly. However, when moving a
6324 ** malformed cell from a leaf page to an interior page, if the cell size 6346 ** malformed cell from a leaf page to an interior page, if the cell size
6325 ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size 6347 ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size
6326 ** might be less than 8 (leaf-size + pointer) on the interior node. Hence 6348 ** might be less than 8 (leaf-size + pointer) on the interior node. Hence
6327 ** the term after the || in the following assert(). */ 6349 ** the term after the || in the following assert(). */
6328 assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) ); 6350 assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) );
6329 if( pPage->nOverflow || sz+2>pPage->nFree ){ 6351 if( pPage->nOverflow || sz+2>pPage->nFree ){
6330 if( pTemp ){ 6352 if( pTemp ){
6331 memcpy(pTemp, pCell, sz); 6353 memcpy(pTemp, pCell, sz);
6332 pCell = pTemp; 6354 pCell = pTemp;
6333 } 6355 }
6334 if( iChild ){ 6356 if( iChild ){
6335 put4byte(pCell, iChild); 6357 put4byte(pCell, iChild);
6336 } 6358 }
6337 j = pPage->nOverflow++; 6359 j = pPage->nOverflow++;
6338 assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); 6360 /* Comparison against ArraySize-1 since we hold back one extra slot
6361 ** as a contingency. In other words, never need more than 3 overflow
6362 ** slots but 4 are allocated, just to be safe. */
6363 assert( j < ArraySize(pPage->apOvfl)-1 );
6339 pPage->apOvfl[j] = pCell; 6364 pPage->apOvfl[j] = pCell;
6340 pPage->aiOvfl[j] = (u16)i; 6365 pPage->aiOvfl[j] = (u16)i;
6341 6366
6342 /* When multiple overflows occur, they are always sequential and in 6367 /* When multiple overflows occur, they are always sequential and in
6343 ** sorted order. This invariants arise because multiple overflows can 6368 ** sorted order. This invariants arise because multiple overflows can
6344 ** only occur when inserting divider cells into the parent page during 6369 ** only occur when inserting divider cells into the parent page during
6345 ** balancing, and the dividers are adjacent and sorted. 6370 ** balancing, and the dividers are adjacent and sorted.
6346 */ 6371 */
6347 assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ 6372 assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */
6348 assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ 6373 assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
6380 ** the entry for the overflow page into the pointer map. 6405 ** the entry for the overflow page into the pointer map.
6381 */ 6406 */
6382 ptrmapPutOvflPtr(pPage, pCell, pRC); 6407 ptrmapPutOvflPtr(pPage, pCell, pRC);
6383 } 6408 }
6384 #endif 6409 #endif
6385 } 6410 }
6386 } 6411 }
6387 6412
6388 /* 6413 /*
6389 ** A CellArray object contains a cache of pointers and sizes for a 6414 ** A CellArray object contains a cache of pointers and sizes for a
6390 ** consecutive sequence of cells that might be held multiple pages. 6415 ** consecutive sequence of cells that might be held on multiple pages.
6391 */ 6416 */
6392 typedef struct CellArray CellArray; 6417 typedef struct CellArray CellArray;
6393 struct CellArray { 6418 struct CellArray {
6394 int nCell; /* Number of cells in apCell[] */ 6419 int nCell; /* Number of cells in apCell[] */
6395 MemPage *pRef; /* Reference page */ 6420 MemPage *pRef; /* Reference page */
6396 u8 **apCell; /* All cells begin balanced */ 6421 u8 **apCell; /* All cells begin balanced */
6397 u16 *szCell; /* Local size of all cells in apCell[] */ 6422 u16 *szCell; /* Local size of all cells in apCell[] */
6398 }; 6423 };
6399 6424
6400 /* 6425 /*
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
6525 int i; 6550 int i;
6526 u8 *aData = pPg->aData; 6551 u8 *aData = pPg->aData;
6527 u8 *pData = *ppData; 6552 u8 *pData = *ppData;
6528 int iEnd = iFirst + nCell; 6553 int iEnd = iFirst + nCell;
6529 assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ 6554 assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */
6530 for(i=iFirst; i<iEnd; i++){ 6555 for(i=iFirst; i<iEnd; i++){
6531 int sz, rc; 6556 int sz, rc;
6532 u8 *pSlot; 6557 u8 *pSlot;
6533 sz = cachedCellSize(pCArray, i); 6558 sz = cachedCellSize(pCArray, i);
6534 if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){ 6559 if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){
6560 if( (pData - pBegin)<sz ) return 1;
6535 pData -= sz; 6561 pData -= sz;
6536 if( pData<pBegin ) return 1;
6537 pSlot = pData; 6562 pSlot = pData;
6538 } 6563 }
6539 /* pSlot and pCArray->apCell[i] will never overlap on a well-formed 6564 /* pSlot and pCArray->apCell[i] will never overlap on a well-formed
6540 ** database. But they might for a corrupt database. Hence use memmove() 6565 ** database. But they might for a corrupt database. Hence use memmove()
6541 ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */ 6566 ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */
6542 assert( (pSlot+sz)<=pCArray->apCell[i] 6567 assert( (pSlot+sz)<=pCArray->apCell[i]
6543 || pSlot>=(pCArray->apCell[i]+sz) 6568 || pSlot>=(pCArray->apCell[i]+sz)
6544 || CORRUPT_DB ); 6569 || CORRUPT_DB );
6545 memmove(pSlot, pCArray->apCell[i], sz); 6570 memmove(pSlot, pCArray->apCell[i], sz);
6546 put2byte(pCellptr, (pSlot - aData)); 6571 put2byte(pCellptr, (pSlot - aData));
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
6688 pPg->nCell = nNew; 6713 pPg->nCell = nNew;
6689 pPg->nOverflow = 0; 6714 pPg->nOverflow = 0;
6690 6715
6691 put2byte(&aData[hdr+3], pPg->nCell); 6716 put2byte(&aData[hdr+3], pPg->nCell);
6692 put2byte(&aData[hdr+5], pData - aData); 6717 put2byte(&aData[hdr+5], pData - aData);
6693 6718
6694 #ifdef SQLITE_DEBUG 6719 #ifdef SQLITE_DEBUG
6695 for(i=0; i<nNew && !CORRUPT_DB; i++){ 6720 for(i=0; i<nNew && !CORRUPT_DB; i++){
6696 u8 *pCell = pCArray->apCell[i+iNew]; 6721 u8 *pCell = pCArray->apCell[i+iNew];
6697 int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); 6722 int iOff = get2byteAligned(&pPg->aCellIdx[i*2]);
6698 if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ 6723 if( SQLITE_WITHIN(pCell, aData, &aData[pPg->pBt->usableSize]) ){
6699 pCell = &pTmp[pCell - aData]; 6724 pCell = &pTmp[pCell - aData];
6700 } 6725 }
6701 assert( 0==memcmp(pCell, &aData[iOff], 6726 assert( 0==memcmp(pCell, &aData[iOff],
6702 pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); 6727 pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
6703 } 6728 }
6704 #endif 6729 #endif
6705 6730
6706 return SQLITE_OK; 6731 return SQLITE_OK;
6707 editpage_fail: 6732 editpage_fail:
6708 /* Unable to edit this page. Rebuild it from scratch instead. */ 6733 /* Unable to edit this page. Rebuild it from scratch instead. */
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
6812 ** field. The second while(...) loop copies the key value from the 6837 ** field. The second while(...) loop copies the key value from the
6813 ** cell on pPage into the pSpace buffer. 6838 ** cell on pPage into the pSpace buffer.
6814 */ 6839 */
6815 pCell = findCell(pPage, pPage->nCell-1); 6840 pCell = findCell(pPage, pPage->nCell-1);
6816 pStop = &pCell[9]; 6841 pStop = &pCell[9];
6817 while( (*(pCell++)&0x80) && pCell<pStop ); 6842 while( (*(pCell++)&0x80) && pCell<pStop );
6818 pStop = &pCell[9]; 6843 pStop = &pCell[9];
6819 while( ((*(pOut++) = *(pCell++))&0x80) && pCell<pStop ); 6844 while( ((*(pOut++) = *(pCell++))&0x80) && pCell<pStop );
6820 6845
6821 /* Insert the new divider cell into pParent. */ 6846 /* Insert the new divider cell into pParent. */
6822 insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace), 6847 if( rc==SQLITE_OK ){
6823 0, pPage->pgno, &rc); 6848 insertCell(pParent, pParent->nCell, pSpace, (int)(pOut-pSpace),
6849 0, pPage->pgno, &rc);
6850 }
6824 6851
6825 /* Set the right-child pointer of pParent to point to the new page. */ 6852 /* Set the right-child pointer of pParent to point to the new page. */
6826 put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); 6853 put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
6827 6854
6828 /* Release the reference to the new page. */ 6855 /* Release the reference to the new page. */
6829 releasePage(pNew); 6856 releasePage(pNew);
6830 } 6857 }
6831 6858
6832 return rc; 6859 return rc;
6833 } 6860 }
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
7022 7049
7023 /* At this point pParent may have at most one overflow cell. And if 7050 /* At this point pParent may have at most one overflow cell. And if
7024 ** this overflow cell is present, it must be the cell with 7051 ** this overflow cell is present, it must be the cell with
7025 ** index iParentIdx. This scenario comes about when this function 7052 ** index iParentIdx. This scenario comes about when this function
7026 ** is called (indirectly) from sqlite3BtreeDelete(). 7053 ** is called (indirectly) from sqlite3BtreeDelete().
7027 */ 7054 */
7028 assert( pParent->nOverflow==0 || pParent->nOverflow==1 ); 7055 assert( pParent->nOverflow==0 || pParent->nOverflow==1 );
7029 assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx ); 7056 assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx );
7030 7057
7031 if( !aOvflSpace ){ 7058 if( !aOvflSpace ){
7032 return SQLITE_NOMEM; 7059 return SQLITE_NOMEM_BKPT;
7033 } 7060 }
7034 7061
7035 /* Find the sibling pages to balance. Also locate the cells in pParent 7062 /* Find the sibling pages to balance. Also locate the cells in pParent
7036 ** that divide the siblings. An attempt is made to find NN siblings on 7063 ** that divide the siblings. An attempt is made to find NN siblings on
7037 ** either side of pPage. More siblings are taken from one side, however, 7064 ** either side of pPage. More siblings are taken from one side, however,
7038 ** if there are fewer than NN siblings on the other side. If pParent 7065 ** if there are fewer than NN siblings on the other side. If pParent
7039 ** has NB or fewer children then all children of pParent are taken. 7066 ** has NB or fewer children then all children of pParent are taken.
7040 ** 7067 **
7041 ** This loop also drops the divider cells from the parent page. This 7068 ** This loop also drops the divider cells from the parent page. This
7042 ** way, the remainder of the function does not have to deal with any 7069 ** way, the remainder of the function does not have to deal with any
(...skipping 23 matching lines...) Expand all
7066 pgno = get4byte(pRight); 7093 pgno = get4byte(pRight);
7067 while( 1 ){ 7094 while( 1 ){
7068 rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); 7095 rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0);
7069 if( rc ){ 7096 if( rc ){
7070 memset(apOld, 0, (i+1)*sizeof(MemPage*)); 7097 memset(apOld, 0, (i+1)*sizeof(MemPage*));
7071 goto balance_cleanup; 7098 goto balance_cleanup;
7072 } 7099 }
7073 nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; 7100 nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
7074 if( (i--)==0 ) break; 7101 if( (i--)==0 ) break;
7075 7102
7076 if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ 7103 if( pParent->nOverflow && i+nxDiv==pParent->aiOvfl[0] ){
7077 apDiv[i] = pParent->apOvfl[0]; 7104 apDiv[i] = pParent->apOvfl[0];
7078 pgno = get4byte(apDiv[i]); 7105 pgno = get4byte(apDiv[i]);
7079 szNew[i] = pParent->xCellSize(pParent, apDiv[i]); 7106 szNew[i] = pParent->xCellSize(pParent, apDiv[i]);
7080 pParent->nOverflow = 0; 7107 pParent->nOverflow = 0;
7081 }else{ 7108 }else{
7082 apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); 7109 apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow);
7083 pgno = get4byte(apDiv[i]); 7110 pgno = get4byte(apDiv[i]);
7084 szNew[i] = pParent->xCellSize(pParent, apDiv[i]); 7111 szNew[i] = pParent->xCellSize(pParent, apDiv[i]);
7085 7112
7086 /* Drop the cell from the parent page. apDiv[i] still points to 7113 /* Drop the cell from the parent page. apDiv[i] still points to
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
7122 szScratch = 7149 szScratch =
7123 nMaxCells*sizeof(u8*) /* b.apCell */ 7150 nMaxCells*sizeof(u8*) /* b.apCell */
7124 + nMaxCells*sizeof(u16) /* b.szCell */ 7151 + nMaxCells*sizeof(u16) /* b.szCell */
7125 + pBt->pageSize; /* aSpace1 */ 7152 + pBt->pageSize; /* aSpace1 */
7126 7153
7127 /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer 7154 /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer
7128 ** that is more than 6 times the database page size. */ 7155 ** that is more than 6 times the database page size. */
7129 assert( szScratch<=6*(int)pBt->pageSize ); 7156 assert( szScratch<=6*(int)pBt->pageSize );
7130 b.apCell = sqlite3ScratchMalloc( szScratch ); 7157 b.apCell = sqlite3ScratchMalloc( szScratch );
7131 if( b.apCell==0 ){ 7158 if( b.apCell==0 ){
7132 rc = SQLITE_NOMEM; 7159 rc = SQLITE_NOMEM_BKPT;
7133 goto balance_cleanup; 7160 goto balance_cleanup;
7134 } 7161 }
7135 b.szCell = (u16*)&b.apCell[nMaxCells]; 7162 b.szCell = (u16*)&b.apCell[nMaxCells];
7136 aSpace1 = (u8*)&b.szCell[nMaxCells]; 7163 aSpace1 = (u8*)&b.szCell[nMaxCells];
7137 assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); 7164 assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
7138 7165
7139 /* 7166 /*
7140 ** Load pointers to all cells on sibling pages and the divider cells 7167 ** Load pointers to all cells on sibling pages and the divider cells
7141 ** into the local b.apCell[] array. Make copies of the divider cells 7168 ** into the local b.apCell[] array. Make copies of the divider cells
7142 ** into space obtained from aSpace1[]. The divider cells have already 7169 ** into space obtained from aSpace1[]. The divider cells have already
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
7181 ** cells into a parent on a prior balance, and divider cells are always 7208 ** cells into a parent on a prior balance, and divider cells are always
7182 ** adjacent and are inserted in order. There is an assert() tagged 7209 ** adjacent and are inserted in order. There is an assert() tagged
7183 ** with "NOTE 1" in the overflow cell insertion loop to prove this 7210 ** with "NOTE 1" in the overflow cell insertion loop to prove this
7184 ** invariant. 7211 ** invariant.
7185 ** 7212 **
7186 ** This must be done in advance. Once the balance starts, the cell 7213 ** This must be done in advance. Once the balance starts, the cell
7187 ** offset section of the btree page will be overwritten and we will no 7214 ** offset section of the btree page will be overwritten and we will no
7188 ** long be able to find the cells if a pointer to each cell is not saved 7215 ** long be able to find the cells if a pointer to each cell is not saved
7189 ** first. 7216 ** first.
7190 */ 7217 */
7191 memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit); 7218 memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*(limit+pOld->nOverflow));
7192 if( pOld->nOverflow>0 ){ 7219 if( pOld->nOverflow>0 ){
7193 memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
7194 limit = pOld->aiOvfl[0]; 7220 limit = pOld->aiOvfl[0];
7195 for(j=0; j<limit; j++){ 7221 for(j=0; j<limit; j++){
7196 b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell)); 7222 b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
7197 piCell += 2; 7223 piCell += 2;
7198 b.nCell++; 7224 b.nCell++;
7199 } 7225 }
7200 for(k=0; k<pOld->nOverflow; k++){ 7226 for(k=0; k<pOld->nOverflow; k++){
7201 assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ 7227 assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */
7202 b.apCell[b.nCell] = pOld->apOvfl[k]; 7228 b.apCell[b.nCell] = pOld->apOvfl[k];
7203 b.nCell++; 7229 b.nCell++;
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
7259 ** szNew[i]: Spaced used on the i-th sibling page. 7285 ** szNew[i]: Spaced used on the i-th sibling page.
7260 ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to 7286 ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to
7261 ** the right of the i-th sibling page. 7287 ** the right of the i-th sibling page.
7262 ** usableSpace: Number of bytes of space available on each sibling. 7288 ** usableSpace: Number of bytes of space available on each sibling.
7263 ** 7289 **
7264 */ 7290 */
7265 usableSpace = pBt->usableSize - 12 + leafCorrection; 7291 usableSpace = pBt->usableSize - 12 + leafCorrection;
7266 for(i=0; i<nOld; i++){ 7292 for(i=0; i<nOld; i++){
7267 MemPage *p = apOld[i]; 7293 MemPage *p = apOld[i];
7268 szNew[i] = usableSpace - p->nFree; 7294 szNew[i] = usableSpace - p->nFree;
7269 if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
7270 for(j=0; j<p->nOverflow; j++){ 7295 for(j=0; j<p->nOverflow; j++){
7271 szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); 7296 szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]);
7272 } 7297 }
7273 cntNew[i] = cntOld[i]; 7298 cntNew[i] = cntOld[i];
7274 } 7299 }
7275 k = nOld; 7300 k = nOld;
7276 for(i=0; i<k; i++){ 7301 for(i=0; i<k; i++){
7277 int sz; 7302 int sz;
7278 while( szNew[i]>usableSpace ){ 7303 while( szNew[i]>usableSpace ){
7279 if( i+1>=k ){ 7304 if( i+1>=k ){
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
7334 int d; /* Index of first cell to the left of right sibling */ 7359 int d; /* Index of first cell to the left of right sibling */
7335 7360
7336 r = cntNew[i-1] - 1; 7361 r = cntNew[i-1] - 1;
7337 d = r + 1 - leafData; 7362 d = r + 1 - leafData;
7338 (void)cachedCellSize(&b, d); 7363 (void)cachedCellSize(&b, d);
7339 do{ 7364 do{
7340 assert( d<nMaxCells ); 7365 assert( d<nMaxCells );
7341 assert( r<nMaxCells ); 7366 assert( r<nMaxCells );
7342 (void)cachedCellSize(&b, r); 7367 (void)cachedCellSize(&b, r);
7343 if( szRight!=0 7368 if( szRight!=0
7344 && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+2)) ){ 7369 && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+(i==k-1?0:2)))){
7345 break; 7370 break;
7346 } 7371 }
7347 szRight += b.szCell[d] + 2; 7372 szRight += b.szCell[d] + 2;
7348 szLeft -= b.szCell[r] + 2; 7373 szLeft -= b.szCell[r] + 2;
7349 cntNew[i-1] = r; 7374 cntNew[i-1] = r;
7350 r--; 7375 r--;
7351 d--; 7376 d--;
7352 }while( r>=0 ); 7377 }while( r>=0 );
7353 szNew[i] = szRight; 7378 szNew[i] = szRight;
7354 szNew[i-1] = szLeft; 7379 szNew[i-1] = szLeft;
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
7558 pTemp = 0; 7583 pTemp = 0;
7559 }else{ 7584 }else{
7560 pCell -= 4; 7585 pCell -= 4;
7561 /* Obscure case for non-leaf-data trees: If the cell at pCell was 7586 /* Obscure case for non-leaf-data trees: If the cell at pCell was
7562 ** previously stored on a leaf node, and its reported size was 4 7587 ** previously stored on a leaf node, and its reported size was 4
7563 ** bytes, then it may actually be smaller than this 7588 ** bytes, then it may actually be smaller than this
7564 ** (see btreeParseCellPtr(), 4 bytes is the minimum size of 7589 ** (see btreeParseCellPtr(), 4 bytes is the minimum size of
7565 ** any cell). But it is important to pass the correct size to 7590 ** any cell). But it is important to pass the correct size to
7566 ** insertCell(), so reparse the cell now. 7591 ** insertCell(), so reparse the cell now.
7567 ** 7592 **
7568 ** Note that this can never happen in an SQLite data file, as all 7593 ** This can only happen for b-trees used to evaluate "IN (SELECT ...)"
7569 ** cells are at least 4 bytes. It only happens in b-trees used 7594 ** and WITHOUT ROWID tables with exactly one column which is the
7570 ** to evaluate "IN (SELECT ...)" and similar clauses. 7595 ** primary key.
7571 */ 7596 */
7572 if( b.szCell[j]==4 ){ 7597 if( b.szCell[j]==4 ){
7573 assert(leafCorrection==4); 7598 assert(leafCorrection==4);
7574 sz = pParent->xCellSize(pParent, pCell); 7599 sz = pParent->xCellSize(pParent, pCell);
7575 } 7600 }
7576 } 7601 }
7577 iOvflSpace += sz; 7602 iOvflSpace += sz;
7578 assert( sz<=pBt->maxLocal+23 ); 7603 assert( sz<=pBt->maxLocal+23 );
7579 assert( iOvflSpace <= (int)pBt->pageSize ); 7604 assert( iOvflSpace <= (int)pBt->pageSize );
7580 insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc); 7605 insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
7794 ** balance_quick() 7819 ** balance_quick()
7795 ** balance_deeper() 7820 ** balance_deeper()
7796 ** balance_nonroot() 7821 ** balance_nonroot()
7797 */ 7822 */
7798 static int balance(BtCursor *pCur){ 7823 static int balance(BtCursor *pCur){
7799 int rc = SQLITE_OK; 7824 int rc = SQLITE_OK;
7800 const int nMin = pCur->pBt->usableSize * 2 / 3; 7825 const int nMin = pCur->pBt->usableSize * 2 / 3;
7801 u8 aBalanceQuickSpace[13]; 7826 u8 aBalanceQuickSpace[13];
7802 u8 *pFree = 0; 7827 u8 *pFree = 0;
7803 7828
7804 TESTONLY( int balance_quick_called = 0 ); 7829 VVA_ONLY( int balance_quick_called = 0 );
7805 TESTONLY( int balance_deeper_called = 0 ); 7830 VVA_ONLY( int balance_deeper_called = 0 );
7806 7831
7807 do { 7832 do {
7808 int iPage = pCur->iPage; 7833 int iPage = pCur->iPage;
7809 MemPage *pPage = pCur->apPage[iPage]; 7834 MemPage *pPage = pCur->apPage[iPage];
7810 7835
7811 if( iPage==0 ){ 7836 if( iPage==0 ){
7812 if( pPage->nOverflow ){ 7837 if( pPage->nOverflow ){
7813 /* The root page of the b-tree is overfull. In this case call the 7838 /* The root page of the b-tree is overfull. In this case call the
7814 ** balance_deeper() function to create a new child for the root-page 7839 ** balance_deeper() function to create a new child for the root-page
7815 ** and copy the current contents of the root-page to it. The 7840 ** and copy the current contents of the root-page to it. The
7816 ** next iteration of the do-loop will balance the child page. 7841 ** next iteration of the do-loop will balance the child page.
7817 */ 7842 */
7818 assert( (balance_deeper_called++)==0 ); 7843 assert( balance_deeper_called==0 );
7844 VVA_ONLY( balance_deeper_called++ );
7819 rc = balance_deeper(pPage, &pCur->apPage[1]); 7845 rc = balance_deeper(pPage, &pCur->apPage[1]);
7820 if( rc==SQLITE_OK ){ 7846 if( rc==SQLITE_OK ){
7821 pCur->iPage = 1; 7847 pCur->iPage = 1;
7822 pCur->aiIdx[0] = 0; 7848 pCur->aiIdx[0] = 0;
7823 pCur->aiIdx[1] = 0; 7849 pCur->aiIdx[1] = 0;
7824 assert( pCur->apPage[1]->nOverflow ); 7850 assert( pCur->apPage[1]->nOverflow );
7825 } 7851 }
7826 }else{ 7852 }else{
7827 break; 7853 break;
7828 } 7854 }
(...skipping 18 matching lines...) Expand all
7847 ** happens, the next iteration of the do-loop will balance pParent 7873 ** happens, the next iteration of the do-loop will balance pParent
7848 ** use either balance_nonroot() or balance_deeper(). Until this 7874 ** use either balance_nonroot() or balance_deeper(). Until this
7849 ** happens, the overflow cell is stored in the aBalanceQuickSpace[] 7875 ** happens, the overflow cell is stored in the aBalanceQuickSpace[]
7850 ** buffer. 7876 ** buffer.
7851 ** 7877 **
7852 ** The purpose of the following assert() is to check that only a 7878 ** The purpose of the following assert() is to check that only a
7853 ** single call to balance_quick() is made for each call to this 7879 ** single call to balance_quick() is made for each call to this
7854 ** function. If this were not verified, a subtle bug involving reuse 7880 ** function. If this were not verified, a subtle bug involving reuse
7855 ** of the aBalanceQuickSpace[] might sneak in. 7881 ** of the aBalanceQuickSpace[] might sneak in.
7856 */ 7882 */
7857 assert( (balance_quick_called++)==0 ); 7883 assert( balance_quick_called==0 );
7884 VVA_ONLY( balance_quick_called++ );
7858 rc = balance_quick(pParent, pPage, aBalanceQuickSpace); 7885 rc = balance_quick(pParent, pPage, aBalanceQuickSpace);
7859 }else 7886 }else
7860 #endif 7887 #endif
7861 { 7888 {
7862 /* In this case, call balance_nonroot() to redistribute cells 7889 /* In this case, call balance_nonroot() to redistribute cells
7863 ** between pPage and up to 2 of its sibling pages. This involves 7890 ** between pPage and up to 2 of its sibling pages. This involves
7864 ** modifying the contents of pParent, which may cause pParent to 7891 ** modifying the contents of pParent, which may cause pParent to
7865 ** become overfull or underfull. The next iteration of the do-loop 7892 ** become overfull or underfull. The next iteration of the do-loop
7866 ** will balance the parent page to correct this. 7893 ** will balance the parent page to correct this.
7867 ** 7894 **
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
7904 }while( rc==SQLITE_OK ); 7931 }while( rc==SQLITE_OK );
7905 7932
7906 if( pFree ){ 7933 if( pFree ){
7907 sqlite3PageFree(pFree); 7934 sqlite3PageFree(pFree);
7908 } 7935 }
7909 return rc; 7936 return rc;
7910 } 7937 }
7911 7938
7912 7939
7913 /* 7940 /*
7914 ** Insert a new record into the BTree. The key is given by (pKey,nKey) 7941 ** Insert a new record into the BTree. The content of the new record
7915 ** and the data is given by (pData,nData). The cursor is used only to 7942 ** is described by the pX object. The pCur cursor is used only to
7916 ** define what table the record should be inserted into. The cursor 7943 ** define what table the record should be inserted into, and is left
7917 ** is left pointing at a random location. 7944 ** pointing at a random location.
7918 ** 7945 **
7919 ** For an INTKEY table, only the nKey value of the key is used. pKey is 7946 ** For a table btree (used for rowid tables), only the pX.nKey value of
7920 ** ignored. For a ZERODATA table, the pData and nData are both ignored. 7947 ** the key is used. The pX.pKey value must be NULL. The pX.nKey is the
7948 ** rowid or INTEGER PRIMARY KEY of the row. The pX.nData,pData,nZero fields
7949 ** hold the content of the row.
7950 **
7951 ** For an index btree (used for indexes and WITHOUT ROWID tables), the
7952 ** key is an arbitrary byte sequence stored in pX.pKey,nKey. The
7953 ** pX.pData,nData,nZero fields must be zero.
7921 ** 7954 **
7922 ** If the seekResult parameter is non-zero, then a successful call to 7955 ** If the seekResult parameter is non-zero, then a successful call to
7923 ** MovetoUnpacked() to seek cursor pCur to (pKey, nKey) has already 7956 ** MovetoUnpacked() to seek cursor pCur to (pKey,nKey) has already
7924 ** been performed. seekResult is the search result returned (a negative 7957 ** been performed. In other words, if seekResult!=0 then the cursor
7925 ** number if pCur points at an entry that is smaller than (pKey, nKey), or 7958 ** is currently pointing to a cell that will be adjacent to the cell
7926 ** a positive value if pCur points at an entry that is larger than 7959 ** to be inserted. If seekResult<0 then pCur points to a cell that is
7927 ** (pKey, nKey)). 7960 ** smaller then (pKey,nKey). If seekResult>0 then pCur points to a cell
7961 ** that is larger than (pKey,nKey).
7928 ** 7962 **
7929 ** If the seekResult parameter is non-zero, then the caller guarantees that 7963 ** If seekResult==0, that means pCur is pointing at some unknown location.
7930 ** cursor pCur is pointing at the existing copy of a row that is to be 7964 ** In that case, this routine must seek the cursor to the correct insertion
7931 ** overwritten. If the seekResult parameter is 0, then cursor pCur may 7965 ** point for (pKey,nKey) before doing the insertion. For index btrees,
7932 ** point to any entry or to no entry at all and so this function has to seek 7966 ** if pX->nMem is non-zero, then pX->aMem contains pointers to the unpacked
7933 ** the cursor before the new key can be inserted. 7967 ** key values and pX->aMem can be used instead of pX->pKey to avoid having
7968 ** to decode the key.
7934 */ 7969 */
7935 int sqlite3BtreeInsert( 7970 int sqlite3BtreeInsert(
7936 BtCursor *pCur, /* Insert data into the table of this cursor */ 7971 BtCursor *pCur, /* Insert data into the table of this cursor */
7937 const void *pKey, i64 nKey, /* The key of the new record */ 7972 const BtreePayload *pX, /* Content of the row to be inserted */
7938 const void *pData, int nData, /* The data of the new record */ 7973 int flags, /* True if this is likely an append */
7939 int nZero, /* Number of extra 0 bytes to append to data */
7940 int appendBias, /* True if this is likely an append */
7941 int seekResult /* Result of prior MovetoUnpacked() call */ 7974 int seekResult /* Result of prior MovetoUnpacked() call */
7942 ){ 7975 ){
7943 int rc; 7976 int rc;
7944 int loc = seekResult; /* -1: before desired location +1: after */ 7977 int loc = seekResult; /* -1: before desired location +1: after */
7945 int szNew = 0; 7978 int szNew = 0;
7946 int idx; 7979 int idx;
7947 MemPage *pPage; 7980 MemPage *pPage;
7948 Btree *p = pCur->pBtree; 7981 Btree *p = pCur->pBtree;
7949 BtShared *pBt = p->pBt; 7982 BtShared *pBt = p->pBt;
7950 unsigned char *oldCell; 7983 unsigned char *oldCell;
7951 unsigned char *newCell = 0; 7984 unsigned char *newCell = 0;
7952 7985
7986 assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND))==flags );
7987
7953 if( pCur->eState==CURSOR_FAULT ){ 7988 if( pCur->eState==CURSOR_FAULT ){
7954 assert( pCur->skipNext!=SQLITE_OK ); 7989 assert( pCur->skipNext!=SQLITE_OK );
7955 return pCur->skipNext; 7990 return pCur->skipNext;
7956 } 7991 }
7957 7992
7958 assert( cursorHoldsMutex(pCur) ); 7993 assert( cursorOwnsBtShared(pCur) );
7959 assert( (pCur->curFlags & BTCF_WriteFlag)!=0 7994 assert( (pCur->curFlags & BTCF_WriteFlag)!=0
7960 && pBt->inTransaction==TRANS_WRITE 7995 && pBt->inTransaction==TRANS_WRITE
7961 && (pBt->btsFlags & BTS_READ_ONLY)==0 ); 7996 && (pBt->btsFlags & BTS_READ_ONLY)==0 );
7962 assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); 7997 assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
7963 7998
7964 /* Assert that the caller has been consistent. If this cursor was opened 7999 /* Assert that the caller has been consistent. If this cursor was opened
7965 ** expecting an index b-tree, then the caller should be inserting blob 8000 ** expecting an index b-tree, then the caller should be inserting blob
7966 ** keys with no associated data. If the cursor was opened expecting an 8001 ** keys with no associated data. If the cursor was opened expecting an
7967 ** intkey table, the caller should be inserting integer keys with a 8002 ** intkey table, the caller should be inserting integer keys with a
7968 ** blob of associated data. */ 8003 ** blob of associated data. */
7969 assert( (pKey==0)==(pCur->pKeyInfo==0) ); 8004 assert( (pX->pKey==0)==(pCur->pKeyInfo==0) );
7970 8005
7971 /* Save the positions of any other cursors open on this table. 8006 /* Save the positions of any other cursors open on this table.
7972 ** 8007 **
7973 ** In some cases, the call to btreeMoveto() below is a no-op. For 8008 ** In some cases, the call to btreeMoveto() below is a no-op. For
7974 ** example, when inserting data into a table with auto-generated integer 8009 ** example, when inserting data into a table with auto-generated integer
7975 ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the 8010 ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the
7976 ** integer key to use. It then calls this function to actually insert the 8011 ** integer key to use. It then calls this function to actually insert the
7977 ** data into the intkey B-Tree. In this case btreeMoveto() recognizes 8012 ** data into the intkey B-Tree. In this case btreeMoveto() recognizes
7978 ** that the cursor is already where it needs to be and returns without 8013 ** that the cursor is already where it needs to be and returns without
7979 ** doing any work. To avoid thwarting these optimizations, it is important 8014 ** doing any work. To avoid thwarting these optimizations, it is important
7980 ** not to clear the cursor here. 8015 ** not to clear the cursor here.
7981 */ 8016 */
7982 if( pCur->curFlags & BTCF_Multiple ){ 8017 if( pCur->curFlags & BTCF_Multiple ){
7983 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); 8018 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
7984 if( rc ) return rc; 8019 if( rc ) return rc;
7985 } 8020 }
7986 8021
7987 if( pCur->pKeyInfo==0 ){ 8022 if( pCur->pKeyInfo==0 ){
7988 assert( pKey==0 ); 8023 assert( pX->pKey==0 );
7989 /* If this is an insert into a table b-tree, invalidate any incrblob 8024 /* If this is an insert into a table b-tree, invalidate any incrblob
7990 ** cursors open on the row being replaced */ 8025 ** cursors open on the row being replaced */
7991 invalidateIncrblobCursors(p, nKey, 0); 8026 invalidateIncrblobCursors(p, pX->nKey, 0);
8027
8028 /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
8029 ** to a row with the same key as the new entry being inserted. */
8030 assert( (flags & BTREE_SAVEPOSITION)==0 ||
8031 ((pCur->curFlags&BTCF_ValidNKey)!=0 && pX->nKey==pCur->info.nKey) );
7992 8032
7993 /* If the cursor is currently on the last row and we are appending a 8033 /* If the cursor is currently on the last row and we are appending a
7994 ** new row onto the end, set the "loc" to avoid an unnecessary 8034 ** new row onto the end, set the "loc" to avoid an unnecessary
7995 ** btreeMoveto() call */ 8035 ** btreeMoveto() call */
7996 if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 8036 if( (pCur->curFlags&BTCF_ValidNKey)!=0 && pX->nKey==pCur->info.nKey ){
7997 && pCur->info.nKey==nKey-1 ){ 8037 loc = 0;
7998 loc = -1; 8038 }else if( (pCur->curFlags&BTCF_ValidNKey)!=0 && pX->nKey>0
8039 && pCur->info.nKey==pX->nKey-1 ){
8040 loc = -1;
7999 }else if( loc==0 ){ 8041 }else if( loc==0 ){
8000 rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc); 8042 rc = sqlite3BtreeMovetoUnpacked(pCur, 0, pX->nKey, flags!=0, &loc);
8001 if( rc ) return rc; 8043 if( rc ) return rc;
8002 } 8044 }
8003 }else if( loc==0 ){ 8045 }else if( loc==0 && (flags & BTREE_SAVEPOSITION)==0 ){
8004 rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); 8046 if( pX->nMem ){
8047 UnpackedRecord r;
8048 r.pKeyInfo = pCur->pKeyInfo;
8049 r.aMem = pX->aMem;
8050 r.nField = pX->nMem;
8051 r.default_rc = 0;
8052 r.errCode = 0;
8053 r.r1 = 0;
8054 r.r2 = 0;
8055 r.eqSeen = 0;
8056 rc = sqlite3BtreeMovetoUnpacked(pCur, &r, 0, flags!=0, &loc);
8057 }else{
8058 rc = btreeMoveto(pCur, pX->pKey, pX->nKey, flags!=0, &loc);
8059 }
8005 if( rc ) return rc; 8060 if( rc ) return rc;
8006 } 8061 }
8007 assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) ); 8062 assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) );
8008 8063
8009 pPage = pCur->apPage[pCur->iPage]; 8064 pPage = pCur->apPage[pCur->iPage];
8010 assert( pPage->intKey || nKey>=0 ); 8065 assert( pPage->intKey || pX->nKey>=0 );
8011 assert( pPage->leaf || !pPage->intKey ); 8066 assert( pPage->leaf || !pPage->intKey );
8012 8067
8013 TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", 8068 TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
8014 pCur->pgnoRoot, nKey, nData, pPage->pgno, 8069 pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno,
8015 loc==0 ? "overwrite" : "new entry")); 8070 loc==0 ? "overwrite" : "new entry"));
8016 assert( pPage->isInit ); 8071 assert( pPage->isInit );
8017 newCell = pBt->pTmpSpace; 8072 newCell = pBt->pTmpSpace;
8018 assert( newCell!=0 ); 8073 assert( newCell!=0 );
8019 rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); 8074 rc = fillInCell(pPage, newCell, pX, &szNew);
8020 if( rc ) goto end_insert; 8075 if( rc ) goto end_insert;
8021 assert( szNew==pPage->xCellSize(pPage, newCell) ); 8076 assert( szNew==pPage->xCellSize(pPage, newCell) );
8022 assert( szNew <= MX_CELL_SIZE(pBt) ); 8077 assert( szNew <= MX_CELL_SIZE(pBt) );
8023 idx = pCur->aiIdx[pCur->iPage]; 8078 idx = pCur->aiIdx[pCur->iPage];
8024 if( loc==0 ){ 8079 if( loc==0 ){
8025 u16 szOld; 8080 CellInfo info;
8026 assert( idx<pPage->nCell ); 8081 assert( idx<pPage->nCell );
8027 rc = sqlite3PagerWrite(pPage->pDbPage); 8082 rc = sqlite3PagerWrite(pPage->pDbPage);
8028 if( rc ){ 8083 if( rc ){
8029 goto end_insert; 8084 goto end_insert;
8030 } 8085 }
8031 oldCell = findCell(pPage, idx); 8086 oldCell = findCell(pPage, idx);
8032 if( !pPage->leaf ){ 8087 if( !pPage->leaf ){
8033 memcpy(newCell, oldCell, 4); 8088 memcpy(newCell, oldCell, 4);
8034 } 8089 }
8035 rc = clearCell(pPage, oldCell, &szOld); 8090 rc = clearCell(pPage, oldCell, &info);
8036 dropCell(pPage, idx, szOld, &rc); 8091 if( info.nSize==szNew && info.nLocal==info.nPayload ){
8092 /* Overwrite the old cell with the new if they are the same size.
8093 ** We could also try to do this if the old cell is smaller, then add
8094 ** the leftover space to the free list. But experiments show that
8095 ** doing that is no faster then skipping this optimization and just
8096 ** calling dropCell() and insertCell(). */
8097 assert( rc==SQLITE_OK ); /* clearCell never fails when nLocal==nPayload */
8098 if( oldCell+szNew > pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT;
8099 memcpy(oldCell, newCell, szNew);
8100 return SQLITE_OK;
8101 }
8102 dropCell(pPage, idx, info.nSize, &rc);
8037 if( rc ) goto end_insert; 8103 if( rc ) goto end_insert;
8038 }else if( loc<0 && pPage->nCell>0 ){ 8104 }else if( loc<0 && pPage->nCell>0 ){
8039 assert( pPage->leaf ); 8105 assert( pPage->leaf );
8040 idx = ++pCur->aiIdx[pCur->iPage]; 8106 idx = ++pCur->aiIdx[pCur->iPage];
8041 }else{ 8107 }else{
8042 assert( pPage->leaf ); 8108 assert( pPage->leaf );
8043 } 8109 }
8044 insertCell(pPage, idx, newCell, szNew, 0, 0, &rc); 8110 insertCell(pPage, idx, newCell, szNew, 0, 0, &rc);
8111 assert( pPage->nOverflow==0 || rc==SQLITE_OK );
8045 assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 ); 8112 assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 );
8046 8113
8047 /* If no error has occurred and pPage has an overflow cell, call balance() 8114 /* If no error has occurred and pPage has an overflow cell, call balance()
8048 ** to redistribute the cells within the tree. Since balance() may move 8115 ** to redistribute the cells within the tree. Since balance() may move
8049 ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey 8116 ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey
8050 ** variables. 8117 ** variables.
8051 ** 8118 **
8052 ** Previous versions of SQLite called moveToRoot() to move the cursor 8119 ** Previous versions of SQLite called moveToRoot() to move the cursor
8053 ** back to the root page as balance() used to invalidate the contents 8120 ** back to the root page as balance() used to invalidate the contents
8054 ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that, 8121 ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that,
8055 ** set the cursor state to "invalid". This makes common insert operations 8122 ** set the cursor state to "invalid". This makes common insert operations
8056 ** slightly faster. 8123 ** slightly faster.
8057 ** 8124 **
8058 ** There is a subtle but important optimization here too. When inserting 8125 ** There is a subtle but important optimization here too. When inserting
8059 ** multiple records into an intkey b-tree using a single cursor (as can 8126 ** multiple records into an intkey b-tree using a single cursor (as can
8060 ** happen while processing an "INSERT INTO ... SELECT" statement), it 8127 ** happen while processing an "INSERT INTO ... SELECT" statement), it
8061 ** is advantageous to leave the cursor pointing to the last entry in 8128 ** is advantageous to leave the cursor pointing to the last entry in
8062 ** the b-tree if possible. If the cursor is left pointing to the last 8129 ** the b-tree if possible. If the cursor is left pointing to the last
8063 ** entry in the table, and the next row inserted has an integer key 8130 ** entry in the table, and the next row inserted has an integer key
8064 ** larger than the largest existing key, it is possible to insert the 8131 ** larger than the largest existing key, it is possible to insert the
8065 ** row without seeking the cursor. This can be a big performance boost. 8132 ** row without seeking the cursor. This can be a big performance boost.
8066 */ 8133 */
8067 pCur->info.nSize = 0; 8134 pCur->info.nSize = 0;
8068 if( rc==SQLITE_OK && pPage->nOverflow ){ 8135 if( pPage->nOverflow ){
8136 assert( rc==SQLITE_OK );
8069 pCur->curFlags &= ~(BTCF_ValidNKey); 8137 pCur->curFlags &= ~(BTCF_ValidNKey);
8070 rc = balance(pCur); 8138 rc = balance(pCur);
8071 8139
8072 /* Must make sure nOverflow is reset to zero even if the balance() 8140 /* Must make sure nOverflow is reset to zero even if the balance()
8073 ** fails. Internal data structure corruption will result otherwise. 8141 ** fails. Internal data structure corruption will result otherwise.
8074 ** Also, set the cursor state to invalid. This stops saveCursorPosition() 8142 ** Also, set the cursor state to invalid. This stops saveCursorPosition()
8075 ** from trying to save the current position of the cursor. */ 8143 ** from trying to save the current position of the cursor. */
8076 pCur->apPage[pCur->iPage]->nOverflow = 0; 8144 pCur->apPage[pCur->iPage]->nOverflow = 0;
8077 pCur->eState = CURSOR_INVALID; 8145 pCur->eState = CURSOR_INVALID;
8146 if( (flags & BTREE_SAVEPOSITION) && rc==SQLITE_OK ){
8147 rc = moveToRoot(pCur);
8148 if( pCur->pKeyInfo ){
8149 assert( pCur->pKey==0 );
8150 pCur->pKey = sqlite3Malloc( pX->nKey );
8151 if( pCur->pKey==0 ){
8152 rc = SQLITE_NOMEM;
8153 }else{
8154 memcpy(pCur->pKey, pX->pKey, pX->nKey);
8155 }
8156 }
8157 pCur->eState = CURSOR_REQUIRESEEK;
8158 pCur->nKey = pX->nKey;
8159 }
8078 } 8160 }
8079 assert( pCur->apPage[pCur->iPage]->nOverflow==0 ); 8161 assert( pCur->apPage[pCur->iPage]->nOverflow==0 );
8080 8162
8081 end_insert: 8163 end_insert:
8082 return rc; 8164 return rc;
8083 } 8165 }
8084 8166
8085 /* 8167 /*
8086 ** Delete the entry that the cursor is pointing to. 8168 ** Delete the entry that the cursor is pointing to.
8087 ** 8169 **
8088 ** If the second parameter is zero, then the cursor is left pointing at an 8170 ** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then
8089 ** arbitrary location after the delete. If it is non-zero, then the cursor 8171 ** the cursor is left pointing at an arbitrary location after the delete.
8090 ** is left in a state such that the next call to BtreeNext() or BtreePrev() 8172 ** But if that bit is set, then the cursor is left in a state such that
8091 ** moves it to the same row as it would if the call to BtreeDelete() had 8173 ** the next call to BtreeNext() or BtreePrev() moves it to the same row
8092 ** been omitted. 8174 ** as it would have been on if the call to BtreeDelete() had been omitted.
8175 **
8176 ** The BTREE_AUXDELETE bit of flags indicates that is one of several deletes
8177 ** associated with a single table entry and its indexes. Only one of those
8178 ** deletes is considered the "primary" delete. The primary delete occurs
8179 ** on a cursor that is not a BTREE_FORDELETE cursor. All but one delete
8180 ** operation on non-FORDELETE cursors is tagged with the AUXDELETE flag.
8181 ** The BTREE_AUXDELETE bit is a hint that is not used by this implementation,
8182 ** but which might be used by alternative storage engines.
8093 */ 8183 */
8094 int sqlite3BtreeDelete(BtCursor *pCur, int bPreserve){ 8184 int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
8095 Btree *p = pCur->pBtree; 8185 Btree *p = pCur->pBtree;
8096 BtShared *pBt = p->pBt; 8186 BtShared *pBt = p->pBt;
8097 int rc; /* Return code */ 8187 int rc; /* Return code */
8098 MemPage *pPage; /* Page to delete cell from */ 8188 MemPage *pPage; /* Page to delete cell from */
8099 unsigned char *pCell; /* Pointer to cell to delete */ 8189 unsigned char *pCell; /* Pointer to cell to delete */
8100 int iCellIdx; /* Index of cell to delete */ 8190 int iCellIdx; /* Index of cell to delete */
8101 int iCellDepth; /* Depth of node containing pCell */ 8191 int iCellDepth; /* Depth of node containing pCell */
8102 u16 szCell; /* Size of the cell being deleted */ 8192 CellInfo info; /* Size of the cell being deleted */
8103 int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */ 8193 int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */
8194 u8 bPreserve = flags & BTREE_SAVEPOSITION; /* Keep cursor valid */
8104 8195
8105 assert( cursorHoldsMutex(pCur) ); 8196 assert( cursorOwnsBtShared(pCur) );
8106 assert( pBt->inTransaction==TRANS_WRITE ); 8197 assert( pBt->inTransaction==TRANS_WRITE );
8107 assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); 8198 assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
8108 assert( pCur->curFlags & BTCF_WriteFlag ); 8199 assert( pCur->curFlags & BTCF_WriteFlag );
8109 assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); 8200 assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
8110 assert( !hasReadConflicts(p, pCur->pgnoRoot) ); 8201 assert( !hasReadConflicts(p, pCur->pgnoRoot) );
8111 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 8202 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
8112 assert( pCur->eState==CURSOR_VALID ); 8203 assert( pCur->eState==CURSOR_VALID );
8204 assert( (flags & ~(BTREE_SAVEPOSITION | BTREE_AUXDELETE))==0 );
8113 8205
8114 iCellDepth = pCur->iPage; 8206 iCellDepth = pCur->iPage;
8115 iCellIdx = pCur->aiIdx[iCellDepth]; 8207 iCellIdx = pCur->aiIdx[iCellDepth];
8116 pPage = pCur->apPage[iCellDepth]; 8208 pPage = pCur->apPage[iCellDepth];
8117 pCell = findCell(pPage, iCellIdx); 8209 pCell = findCell(pPage, iCellIdx);
8118 8210
8211 /* If the bPreserve flag is set to true, then the cursor position must
8212 ** be preserved following this delete operation. If the current delete
8213 ** will cause a b-tree rebalance, then this is done by saving the cursor
8214 ** key and leaving the cursor in CURSOR_REQUIRESEEK state before
8215 ** returning.
8216 **
8217 ** Or, if the current delete will not cause a rebalance, then the cursor
8218 ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately
8219 ** before or after the deleted entry. In this case set bSkipnext to true. */
8220 if( bPreserve ){
8221 if( !pPage->leaf
8222 || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3)
8223 ){
8224 /* A b-tree rebalance will be required after deleting this entry.
8225 ** Save the cursor key. */
8226 rc = saveCursorKey(pCur);
8227 if( rc ) return rc;
8228 }else{
8229 bSkipnext = 1;
8230 }
8231 }
8232
8119 /* If the page containing the entry to delete is not a leaf page, move 8233 /* If the page containing the entry to delete is not a leaf page, move
8120 ** the cursor to the largest entry in the tree that is smaller than 8234 ** the cursor to the largest entry in the tree that is smaller than
8121 ** the entry being deleted. This cell will replace the cell being deleted 8235 ** the entry being deleted. This cell will replace the cell being deleted
8122 ** from the internal node. The 'previous' entry is used for this instead 8236 ** from the internal node. The 'previous' entry is used for this instead
8123 ** of the 'next' entry, as the previous entry is always a part of the 8237 ** of the 'next' entry, as the previous entry is always a part of the
8124 ** sub-tree headed by the child page of the cell being deleted. This makes 8238 ** sub-tree headed by the child page of the cell being deleted. This makes
8125 ** balancing the tree following the delete operation easier. */ 8239 ** balancing the tree following the delete operation easier. */
8126 if( !pPage->leaf ){ 8240 if( !pPage->leaf ){
8127 int notUsed = 0; 8241 int notUsed = 0;
8128 rc = sqlite3BtreePrevious(pCur, &notUsed); 8242 rc = sqlite3BtreePrevious(pCur, &notUsed);
8129 if( rc ) return rc; 8243 if( rc ) return rc;
8130 } 8244 }
8131 8245
8132 /* Save the positions of any other cursors open on this table before 8246 /* Save the positions of any other cursors open on this table before
8133 ** making any modifications. */ 8247 ** making any modifications. */
8134 if( pCur->curFlags & BTCF_Multiple ){ 8248 if( pCur->curFlags & BTCF_Multiple ){
8135 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); 8249 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
8136 if( rc ) return rc; 8250 if( rc ) return rc;
8137 } 8251 }
8138 8252
8139 /* If this is a delete operation to remove a row from a table b-tree, 8253 /* If this is a delete operation to remove a row from a table b-tree,
8140 ** invalidate any incrblob cursors open on the row being deleted. */ 8254 ** invalidate any incrblob cursors open on the row being deleted. */
8141 if( pCur->pKeyInfo==0 ){ 8255 if( pCur->pKeyInfo==0 ){
8142 invalidateIncrblobCursors(p, pCur->info.nKey, 0); 8256 invalidateIncrblobCursors(p, pCur->info.nKey, 0);
8143 } 8257 }
8144 8258
8145 /* If the bPreserve flag is set to true, then the cursor position must
8146 ** be preserved following this delete operation. If the current delete
8147 ** will cause a b-tree rebalance, then this is done by saving the cursor
8148 ** key and leaving the cursor in CURSOR_REQUIRESEEK state before
8149 ** returning.
8150 **
8151 ** Or, if the current delete will not cause a rebalance, then the cursor
8152 ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately
8153 ** before or after the deleted entry. In this case set bSkipnext to true. */
8154 if( bPreserve ){
8155 if( !pPage->leaf
8156 || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3)
8157 ){
8158 /* A b-tree rebalance will be required after deleting this entry.
8159 ** Save the cursor key. */
8160 rc = saveCursorKey(pCur);
8161 if( rc ) return rc;
8162 }else{
8163 bSkipnext = 1;
8164 }
8165 }
8166
8167 /* Make the page containing the entry to be deleted writable. Then free any 8259 /* Make the page containing the entry to be deleted writable. Then free any
8168 ** overflow pages associated with the entry and finally remove the cell 8260 ** overflow pages associated with the entry and finally remove the cell
8169 ** itself from within the page. */ 8261 ** itself from within the page. */
8170 rc = sqlite3PagerWrite(pPage->pDbPage); 8262 rc = sqlite3PagerWrite(pPage->pDbPage);
8171 if( rc ) return rc; 8263 if( rc ) return rc;
8172 rc = clearCell(pPage, pCell, &szCell); 8264 rc = clearCell(pPage, pCell, &info);
8173 dropCell(pPage, iCellIdx, szCell, &rc); 8265 dropCell(pPage, iCellIdx, info.nSize, &rc);
8174 if( rc ) return rc; 8266 if( rc ) return rc;
8175 8267
8176 /* If the cell deleted was not located on a leaf page, then the cursor 8268 /* If the cell deleted was not located on a leaf page, then the cursor
8177 ** is currently pointing to the largest entry in the sub-tree headed 8269 ** is currently pointing to the largest entry in the sub-tree headed
8178 ** by the child-page of the cell that was just deleted from an internal 8270 ** by the child-page of the cell that was just deleted from an internal
8179 ** node. The cell from the leaf node needs to be moved to the internal 8271 ** node. The cell from the leaf node needs to be moved to the internal
8180 ** node to replace the deleted cell. */ 8272 ** node to replace the deleted cell. */
8181 if( !pPage->leaf ){ 8273 if( !pPage->leaf ){
8182 MemPage *pLeaf = pCur->apPage[pCur->iPage]; 8274 MemPage *pLeaf = pCur->apPage[pCur->iPage];
8183 int nCell; 8275 int nCell;
8184 Pgno n = pCur->apPage[iCellDepth+1]->pgno; 8276 Pgno n = pCur->apPage[iCellDepth+1]->pgno;
8185 unsigned char *pTmp; 8277 unsigned char *pTmp;
8186 8278
8187 pCell = findCell(pLeaf, pLeaf->nCell-1); 8279 pCell = findCell(pLeaf, pLeaf->nCell-1);
8188 if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; 8280 if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT;
8189 nCell = pLeaf->xCellSize(pLeaf, pCell); 8281 nCell = pLeaf->xCellSize(pLeaf, pCell);
8190 assert( MX_CELL_SIZE(pBt) >= nCell ); 8282 assert( MX_CELL_SIZE(pBt) >= nCell );
8191 pTmp = pBt->pTmpSpace; 8283 pTmp = pBt->pTmpSpace;
8192 assert( pTmp!=0 ); 8284 assert( pTmp!=0 );
8193 rc = sqlite3PagerWrite(pLeaf->pDbPage); 8285 rc = sqlite3PagerWrite(pLeaf->pDbPage);
8194 insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); 8286 if( rc==SQLITE_OK ){
8287 insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
8288 }
8195 dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); 8289 dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
8196 if( rc ) return rc; 8290 if( rc ) return rc;
8197 } 8291 }
8198 8292
8199 /* Balance the tree. If the entry deleted was located on a leaf page, 8293 /* Balance the tree. If the entry deleted was located on a leaf page,
8200 ** then the cursor still points to that page. In this case the first 8294 ** then the cursor still points to that page. In this case the first
8201 ** call to balance() repairs the tree, and the if(...) condition is 8295 ** call to balance() repairs the tree, and the if(...) condition is
8202 ** never true. 8296 ** never true.
8203 ** 8297 **
8204 ** Otherwise, if the entry deleted was on an internal node page, then 8298 ** Otherwise, if the entry deleted was on an internal node page, then
(...skipping 10 matching lines...) Expand all
8215 if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){ 8309 if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){
8216 while( pCur->iPage>iCellDepth ){ 8310 while( pCur->iPage>iCellDepth ){
8217 releasePage(pCur->apPage[pCur->iPage--]); 8311 releasePage(pCur->apPage[pCur->iPage--]);
8218 } 8312 }
8219 rc = balance(pCur); 8313 rc = balance(pCur);
8220 } 8314 }
8221 8315
8222 if( rc==SQLITE_OK ){ 8316 if( rc==SQLITE_OK ){
8223 if( bSkipnext ){ 8317 if( bSkipnext ){
8224 assert( bPreserve && (pCur->iPage==iCellDepth || CORRUPT_DB) ); 8318 assert( bPreserve && (pCur->iPage==iCellDepth || CORRUPT_DB) );
8225 assert( pPage==pCur->apPage[pCur->iPage] ); 8319 assert( pPage==pCur->apPage[pCur->iPage] || CORRUPT_DB );
8226 assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell ); 8320 assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell );
8227 pCur->eState = CURSOR_SKIPNEXT; 8321 pCur->eState = CURSOR_SKIPNEXT;
8228 if( iCellIdx>=pPage->nCell ){ 8322 if( iCellIdx>=pPage->nCell ){
8229 pCur->skipNext = -1; 8323 pCur->skipNext = -1;
8230 pCur->aiIdx[iCellDepth] = pPage->nCell-1; 8324 pCur->aiIdx[iCellDepth] = pPage->nCell-1;
8231 }else{ 8325 }else{
8232 pCur->skipNext = 1; 8326 pCur->skipNext = 1;
8233 } 8327 }
8234 }else{ 8328 }else{
8235 rc = moveToRoot(pCur); 8329 rc = moveToRoot(pCur);
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
8411 BtShared *pBt, /* The BTree that contains the table */ 8505 BtShared *pBt, /* The BTree that contains the table */
8412 Pgno pgno, /* Page number to clear */ 8506 Pgno pgno, /* Page number to clear */
8413 int freePageFlag, /* Deallocate page if true */ 8507 int freePageFlag, /* Deallocate page if true */
8414 int *pnChange /* Add number of Cells freed to this counter */ 8508 int *pnChange /* Add number of Cells freed to this counter */
8415 ){ 8509 ){
8416 MemPage *pPage; 8510 MemPage *pPage;
8417 int rc; 8511 int rc;
8418 unsigned char *pCell; 8512 unsigned char *pCell;
8419 int i; 8513 int i;
8420 int hdr; 8514 int hdr;
8421 u16 szCell; 8515 CellInfo info;
8422 8516
8423 assert( sqlite3_mutex_held(pBt->mutex) ); 8517 assert( sqlite3_mutex_held(pBt->mutex) );
8424 if( pgno>btreePagecount(pBt) ){ 8518 if( pgno>btreePagecount(pBt) ){
8425 return SQLITE_CORRUPT_BKPT; 8519 return SQLITE_CORRUPT_BKPT;
8426 } 8520 }
8427 rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); 8521 rc = getAndInitPage(pBt, pgno, &pPage, 0, 0);
8428 if( rc ) return rc; 8522 if( rc ) return rc;
8429 if( pPage->bBusy ){ 8523 if( pPage->bBusy ){
8430 rc = SQLITE_CORRUPT_BKPT; 8524 rc = SQLITE_CORRUPT_BKPT;
8431 goto cleardatabasepage_out; 8525 goto cleardatabasepage_out;
8432 } 8526 }
8433 pPage->bBusy = 1; 8527 pPage->bBusy = 1;
8434 hdr = pPage->hdrOffset; 8528 hdr = pPage->hdrOffset;
8435 for(i=0; i<pPage->nCell; i++){ 8529 for(i=0; i<pPage->nCell; i++){
8436 pCell = findCell(pPage, i); 8530 pCell = findCell(pPage, i);
8437 if( !pPage->leaf ){ 8531 if( !pPage->leaf ){
8438 rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); 8532 rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange);
8439 if( rc ) goto cleardatabasepage_out; 8533 if( rc ) goto cleardatabasepage_out;
8440 } 8534 }
8441 rc = clearCell(pPage, pCell, &szCell); 8535 rc = clearCell(pPage, pCell, &info);
8442 if( rc ) goto cleardatabasepage_out; 8536 if( rc ) goto cleardatabasepage_out;
8443 } 8537 }
8444 if( !pPage->leaf ){ 8538 if( !pPage->leaf ){
8445 rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); 8539 rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange);
8446 if( rc ) goto cleardatabasepage_out; 8540 if( rc ) goto cleardatabasepage_out;
8447 }else if( pnChange ){ 8541 }else if( pnChange ){
8448 assert( pPage->intKey || CORRUPT_DB ); 8542 assert( pPage->intKey || CORRUPT_DB );
8449 testcase( !pPage->intKey ); 8543 testcase( !pPage->intKey );
8450 *pnChange += pPage->nCell; 8544 *pnChange += pPage->nCell;
8451 } 8545 }
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
8522 ** The last root page is recorded in meta[3] and the value of 8616 ** The last root page is recorded in meta[3] and the value of
8523 ** meta[3] is updated by this procedure. 8617 ** meta[3] is updated by this procedure.
8524 */ 8618 */
8525 static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ 8619 static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){
8526 int rc; 8620 int rc;
8527 MemPage *pPage = 0; 8621 MemPage *pPage = 0;
8528 BtShared *pBt = p->pBt; 8622 BtShared *pBt = p->pBt;
8529 8623
8530 assert( sqlite3BtreeHoldsMutex(p) ); 8624 assert( sqlite3BtreeHoldsMutex(p) );
8531 assert( p->inTrans==TRANS_WRITE ); 8625 assert( p->inTrans==TRANS_WRITE );
8532 8626 assert( iTable>=2 );
8533 /* It is illegal to drop a table if any cursors are open on the
8534 ** database. This is because in auto-vacuum mode the backend may
8535 ** need to move another root-page to fill a gap left by the deleted
8536 ** root page. If an open cursor was using this page a problem would
8537 ** occur.
8538 **
8539 ** This error is caught long before control reaches this point.
8540 */
8541 if( NEVER(pBt->pCursor) ){
8542 sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db);
8543 return SQLITE_LOCKED_SHAREDCACHE;
8544 }
8545 8627
8546 rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); 8628 rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
8547 if( rc ) return rc; 8629 if( rc ) return rc;
8548 rc = sqlite3BtreeClearTable(p, iTable, 0); 8630 rc = sqlite3BtreeClearTable(p, iTable, 0);
8549 if( rc ){ 8631 if( rc ){
8550 releasePage(pPage); 8632 releasePage(pPage);
8551 return rc; 8633 return rc;
8552 } 8634 }
8553 8635
8554 *piMoved = 0; 8636 *piMoved = 0;
8555 8637
8556 if( iTable>1 ){
8557 #ifdef SQLITE_OMIT_AUTOVACUUM 8638 #ifdef SQLITE_OMIT_AUTOVACUUM
8639 freePage(pPage, &rc);
8640 releasePage(pPage);
8641 #else
8642 if( pBt->autoVacuum ){
8643 Pgno maxRootPgno;
8644 sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno);
8645
8646 if( iTable==maxRootPgno ){
8647 /* If the table being dropped is the table with the largest root-page
8648 ** number in the database, put the root page on the free list.
8649 */
8650 freePage(pPage, &rc);
8651 releasePage(pPage);
8652 if( rc!=SQLITE_OK ){
8653 return rc;
8654 }
8655 }else{
8656 /* The table being dropped does not have the largest root-page
8657 ** number in the database. So move the page that does into the
8658 ** gap left by the deleted root-page.
8659 */
8660 MemPage *pMove;
8661 releasePage(pPage);
8662 rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
8663 if( rc!=SQLITE_OK ){
8664 return rc;
8665 }
8666 rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0);
8667 releasePage(pMove);
8668 if( rc!=SQLITE_OK ){
8669 return rc;
8670 }
8671 pMove = 0;
8672 rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
8673 freePage(pMove, &rc);
8674 releasePage(pMove);
8675 if( rc!=SQLITE_OK ){
8676 return rc;
8677 }
8678 *piMoved = maxRootPgno;
8679 }
8680
8681 /* Set the new 'max-root-page' value in the database header. This
8682 ** is the old value less one, less one more if that happens to
8683 ** be a root-page number, less one again if that is the
8684 ** PENDING_BYTE_PAGE.
8685 */
8686 maxRootPgno--;
8687 while( maxRootPgno==PENDING_BYTE_PAGE(pBt)
8688 || PTRMAP_ISPAGE(pBt, maxRootPgno) ){
8689 maxRootPgno--;
8690 }
8691 assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
8692
8693 rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
8694 }else{
8558 freePage(pPage, &rc); 8695 freePage(pPage, &rc);
8559 releasePage(pPage); 8696 releasePage(pPage);
8560 #else 8697 }
8561 if( pBt->autoVacuum ){
8562 Pgno maxRootPgno;
8563 sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno);
8564
8565 if( iTable==maxRootPgno ){
8566 /* If the table being dropped is the table with the largest root-page
8567 ** number in the database, put the root page on the free list.
8568 */
8569 freePage(pPage, &rc);
8570 releasePage(pPage);
8571 if( rc!=SQLITE_OK ){
8572 return rc;
8573 }
8574 }else{
8575 /* The table being dropped does not have the largest root-page
8576 ** number in the database. So move the page that does into the
8577 ** gap left by the deleted root-page.
8578 */
8579 MemPage *pMove;
8580 releasePage(pPage);
8581 rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
8582 if( rc!=SQLITE_OK ){
8583 return rc;
8584 }
8585 rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0);
8586 releasePage(pMove);
8587 if( rc!=SQLITE_OK ){
8588 return rc;
8589 }
8590 pMove = 0;
8591 rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0);
8592 freePage(pMove, &rc);
8593 releasePage(pMove);
8594 if( rc!=SQLITE_OK ){
8595 return rc;
8596 }
8597 *piMoved = maxRootPgno;
8598 }
8599
8600 /* Set the new 'max-root-page' value in the database header. This
8601 ** is the old value less one, less one more if that happens to
8602 ** be a root-page number, less one again if that is the
8603 ** PENDING_BYTE_PAGE.
8604 */
8605 maxRootPgno--;
8606 while( maxRootPgno==PENDING_BYTE_PAGE(pBt)
8607 || PTRMAP_ISPAGE(pBt, maxRootPgno) ){
8608 maxRootPgno--;
8609 }
8610 assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
8611
8612 rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
8613 }else{
8614 freePage(pPage, &rc);
8615 releasePage(pPage);
8616 }
8617 #endif 8698 #endif
8618 }else{
8619 /* If sqlite3BtreeDropTable was called on page 1.
8620 ** This really never should happen except in a corrupt
8621 ** database.
8622 */
8623 zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
8624 releasePage(pPage);
8625 }
8626 return rc; 8699 return rc;
8627 } 8700 }
8628 int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ 8701 int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
8629 int rc; 8702 int rc;
8630 sqlite3BtreeEnter(p); 8703 sqlite3BtreeEnter(p);
8631 rc = btreeDropTable(p, iTable, piMoved); 8704 rc = btreeDropTable(p, iTable, piMoved);
8632 sqlite3BtreeLeave(p); 8705 sqlite3BtreeLeave(p);
8633 return rc; 8706 return rc;
8634 } 8707 }
8635 8708
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after
8802 ){ 8875 ){
8803 va_list ap; 8876 va_list ap;
8804 if( !pCheck->mxErr ) return; 8877 if( !pCheck->mxErr ) return;
8805 pCheck->mxErr--; 8878 pCheck->mxErr--;
8806 pCheck->nErr++; 8879 pCheck->nErr++;
8807 va_start(ap, zFormat); 8880 va_start(ap, zFormat);
8808 if( pCheck->errMsg.nChar ){ 8881 if( pCheck->errMsg.nChar ){
8809 sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); 8882 sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1);
8810 } 8883 }
8811 if( pCheck->zPfx ){ 8884 if( pCheck->zPfx ){
8812 sqlite3XPrintf(&pCheck->errMsg, 0, pCheck->zPfx, pCheck->v1, pCheck->v2); 8885 sqlite3XPrintf(&pCheck->errMsg, pCheck->zPfx, pCheck->v1, pCheck->v2);
8813 } 8886 }
8814 sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); 8887 sqlite3VXPrintf(&pCheck->errMsg, zFormat, ap);
8815 va_end(ap); 8888 va_end(ap);
8816 if( pCheck->errMsg.accError==STRACCUM_NOMEM ){ 8889 if( pCheck->errMsg.accError==STRACCUM_NOMEM ){
8817 pCheck->mallocFailed = 1; 8890 pCheck->mallocFailed = 1;
8818 } 8891 }
8819 } 8892 }
8820 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 8893 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
8821 8894
8822 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 8895 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
8823 8896
8824 /* 8897 /*
(...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after
9305 ){ 9378 ){
9306 Pgno i; 9379 Pgno i;
9307 IntegrityCk sCheck; 9380 IntegrityCk sCheck;
9308 BtShared *pBt = p->pBt; 9381 BtShared *pBt = p->pBt;
9309 int savedDbFlags = pBt->db->flags; 9382 int savedDbFlags = pBt->db->flags;
9310 char zErr[100]; 9383 char zErr[100];
9311 VVA_ONLY( int nRef ); 9384 VVA_ONLY( int nRef );
9312 9385
9313 sqlite3BtreeEnter(p); 9386 sqlite3BtreeEnter(p);
9314 assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); 9387 assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE );
9315 assert( (nRef = sqlite3PagerRefcount(pBt->pPager))>=0 ); 9388 VVA_ONLY( nRef = sqlite3PagerRefcount(pBt->pPager) );
9389 assert( nRef>=0 );
9316 sCheck.pBt = pBt; 9390 sCheck.pBt = pBt;
9317 sCheck.pPager = pBt->pPager; 9391 sCheck.pPager = pBt->pPager;
9318 sCheck.nPage = btreePagecount(sCheck.pBt); 9392 sCheck.nPage = btreePagecount(sCheck.pBt);
9319 sCheck.mxErr = mxErr; 9393 sCheck.mxErr = mxErr;
9320 sCheck.nErr = 0; 9394 sCheck.nErr = 0;
9321 sCheck.mallocFailed = 0; 9395 sCheck.mallocFailed = 0;
9322 sCheck.zPfx = 0; 9396 sCheck.zPfx = 0;
9323 sCheck.v1 = 0; 9397 sCheck.v1 = 0;
9324 sCheck.v2 = 0; 9398 sCheck.v2 = 0;
9325 sCheck.aPgRef = 0; 9399 sCheck.aPgRef = 0;
9326 sCheck.heap = 0; 9400 sCheck.heap = 0;
9327 sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); 9401 sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
9402 sCheck.errMsg.printfFlags = SQLITE_PRINTF_INTERNAL;
9328 if( sCheck.nPage==0 ){ 9403 if( sCheck.nPage==0 ){
9329 goto integrity_ck_cleanup; 9404 goto integrity_ck_cleanup;
9330 } 9405 }
9331 9406
9332 sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); 9407 sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1);
9333 if( !sCheck.aPgRef ){ 9408 if( !sCheck.aPgRef ){
9334 sCheck.mallocFailed = 1; 9409 sCheck.mallocFailed = 1;
9335 goto integrity_ck_cleanup; 9410 goto integrity_ck_cleanup;
9336 } 9411 }
9337 sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); 9412 sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize );
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
9449 ** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. 9524 ** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
9450 */ 9525 */
9451 int sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ 9526 int sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){
9452 int rc = SQLITE_OK; 9527 int rc = SQLITE_OK;
9453 if( p ){ 9528 if( p ){
9454 BtShared *pBt = p->pBt; 9529 BtShared *pBt = p->pBt;
9455 sqlite3BtreeEnter(p); 9530 sqlite3BtreeEnter(p);
9456 if( pBt->inTransaction!=TRANS_NONE ){ 9531 if( pBt->inTransaction!=TRANS_NONE ){
9457 rc = SQLITE_LOCKED; 9532 rc = SQLITE_LOCKED;
9458 }else{ 9533 }else{
9459 rc = sqlite3PagerCheckpoint(pBt->pPager, eMode, pnLog, pnCkpt); 9534 rc = sqlite3PagerCheckpoint(pBt->pPager, p->db, eMode, pnLog, pnCkpt);
9460 } 9535 }
9461 sqlite3BtreeLeave(p); 9536 sqlite3BtreeLeave(p);
9462 } 9537 }
9463 return rc; 9538 return rc;
9464 } 9539 }
9465 #endif 9540 #endif
9466 9541
9467 /* 9542 /*
9468 ** Return non-zero if a read (or write) transaction is active. 9543 ** Return non-zero if a read (or write) transaction is active.
9469 */ 9544 */
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
9557 ** INTKEY table currently pointing at a valid table entry. 9632 ** INTKEY table currently pointing at a valid table entry.
9558 ** This function modifies the data stored as part of that entry. 9633 ** This function modifies the data stored as part of that entry.
9559 ** 9634 **
9560 ** Only the data content may only be modified, it is not possible to 9635 ** Only the data content may only be modified, it is not possible to
9561 ** change the length of the data stored. If this function is called with 9636 ** change the length of the data stored. If this function is called with
9562 ** parameters that attempt to write past the end of the existing data, 9637 ** parameters that attempt to write past the end of the existing data,
9563 ** no modifications are made and SQLITE_CORRUPT is returned. 9638 ** no modifications are made and SQLITE_CORRUPT is returned.
9564 */ 9639 */
9565 int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ 9640 int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
9566 int rc; 9641 int rc;
9567 assert( cursorHoldsMutex(pCsr) ); 9642 assert( cursorOwnsBtShared(pCsr) );
9568 assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); 9643 assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) );
9569 assert( pCsr->curFlags & BTCF_Incrblob ); 9644 assert( pCsr->curFlags & BTCF_Incrblob );
9570 9645
9571 rc = restoreCursorPosition(pCsr); 9646 rc = restoreCursorPosition(pCsr);
9572 if( rc!=SQLITE_OK ){ 9647 if( rc!=SQLITE_OK ){
9573 return rc; 9648 return rc;
9574 } 9649 }
9575 assert( pCsr->eState!=CURSOR_REQUIRESEEK ); 9650 assert( pCsr->eState!=CURSOR_REQUIRESEEK );
9576 if( pCsr->eState!=CURSOR_VALID ){ 9651 if( pCsr->eState!=CURSOR_VALID ){
9577 return SQLITE_ABORT; 9652 return SQLITE_ABORT;
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
9664 ** Return true if the given Btree is read-only. 9739 ** Return true if the given Btree is read-only.
9665 */ 9740 */
9666 int sqlite3BtreeIsReadonly(Btree *p){ 9741 int sqlite3BtreeIsReadonly(Btree *p){
9667 return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; 9742 return (p->pBt->btsFlags & BTS_READ_ONLY)!=0;
9668 } 9743 }
9669 9744
9670 /* 9745 /*
9671 ** Return the size of the header added to each page by this module. 9746 ** Return the size of the header added to each page by this module.
9672 */ 9747 */
9673 int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } 9748 int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); }
9749
9750 #if !defined(SQLITE_OMIT_SHARED_CACHE)
9751 /*
9752 ** Return true if the Btree passed as the only argument is sharable.
9753 */
9754 int sqlite3BtreeSharable(Btree *p){
9755 return p->sharable;
9756 }
9757
9758 /*
9759 ** Return the number of connections to the BtShared object accessed by
9760 ** the Btree handle passed as the only argument. For private caches
9761 ** this is always 1. For shared caches it may be 1 or greater.
9762 */
9763 int sqlite3BtreeConnectionCount(Btree *p){
9764 testcase( p->sharable );
9765 return p->pBt->nRef;
9766 }
9767 #endif
OLDNEW
« no previous file with comments | « third_party/sqlite/src/src/btree.h ('k') | third_party/sqlite/src/src/btreeInt.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698