Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(258)

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/src/btree.c

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2004 April 6 2 ** 2004 April 6
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 168
169 /* Figure out the root-page that the lock should be held on. For table 169 /* Figure out the root-page that the lock should be held on. For table
170 ** b-trees, this is just the root page of the b-tree being read or 170 ** b-trees, this is just the root page of the b-tree being read or
171 ** written. For index b-trees, it is the root page of the associated 171 ** written. For index b-trees, it is the root page of the associated
172 ** table. */ 172 ** table. */
173 if( isIndex ){ 173 if( isIndex ){
174 HashElem *p; 174 HashElem *p;
175 for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){ 175 for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){
176 Index *pIdx = (Index *)sqliteHashData(p); 176 Index *pIdx = (Index *)sqliteHashData(p);
177 if( pIdx->tnum==(int)iRoot ){ 177 if( pIdx->tnum==(int)iRoot ){
178 if( iTab ){
179 /* Two or more indexes share the same root page. There must
180 ** be imposter tables. So just return true. The assert is not
181 ** useful in that case. */
182 return 1;
183 }
178 iTab = pIdx->pTable->tnum; 184 iTab = pIdx->pTable->tnum;
179 } 185 }
180 } 186 }
181 }else{ 187 }else{
182 iTab = iRoot; 188 iTab = iRoot;
183 } 189 }
184 190
185 /* Search for the required lock. Either a write-lock on root-page iTab, a 191 /* Search for the required lock. Either a write-lock on root-page iTab, a
186 ** write-lock on the schema table, or (if the client is reading) a 192 ** write-lock on the schema table, or (if the client is reading) a
187 ** read-lock on iTab will suffice. Return 1 if any of these are found. */ 193 ** read-lock on iTab will suffice. Return 1 if any of these are found. */
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after
477 ** Otherwise, if argument isClearTable is false, then the row with 483 ** Otherwise, if argument isClearTable is false, then the row with
478 ** rowid iRow is being replaced or deleted. In this case invalidate 484 ** rowid iRow is being replaced or deleted. In this case invalidate
479 ** only those incrblob cursors open on that specific row. 485 ** only those incrblob cursors open on that specific row.
480 */ 486 */
481 static void invalidateIncrblobCursors( 487 static void invalidateIncrblobCursors(
482 Btree *pBtree, /* The database file to check */ 488 Btree *pBtree, /* The database file to check */
483 i64 iRow, /* The rowid that might be changing */ 489 i64 iRow, /* The rowid that might be changing */
484 int isClearTable /* True if all rows are being deleted */ 490 int isClearTable /* True if all rows are being deleted */
485 ){ 491 ){
486 BtCursor *p; 492 BtCursor *p;
487 BtShared *pBt = pBtree->pBt; 493 if( pBtree->hasIncrblobCur==0 ) return;
488 assert( sqlite3BtreeHoldsMutex(pBtree) ); 494 assert( sqlite3BtreeHoldsMutex(pBtree) );
489 for(p=pBt->pCursor; p; p=p->pNext){ 495 pBtree->hasIncrblobCur = 0;
490 if( (p->curFlags & BTCF_Incrblob)!=0 496 for(p=pBtree->pBt->pCursor; p; p=p->pNext){
491 && (isClearTable || p->info.nKey==iRow) 497 if( (p->curFlags & BTCF_Incrblob)!=0 ){
492 ){ 498 pBtree->hasIncrblobCur = 1;
493 p->eState = CURSOR_INVALID; 499 if( isClearTable || p->info.nKey==iRow ){
500 p->eState = CURSOR_INVALID;
501 }
494 } 502 }
495 } 503 }
496 } 504 }
497 505
498 #else 506 #else
499 /* Stub function when INCRBLOB is omitted */ 507 /* Stub function when INCRBLOB is omitted */
500 #define invalidateIncrblobCursors(x,y,z) 508 #define invalidateIncrblobCursors(x,y,z)
501 #endif /* SQLITE_OMIT_INCRBLOB */ 509 #endif /* SQLITE_OMIT_INCRBLOB */
502 510
503 /* 511 /*
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
576 */ 584 */
577 static void btreeReleaseAllCursorPages(BtCursor *pCur){ 585 static void btreeReleaseAllCursorPages(BtCursor *pCur){
578 int i; 586 int i;
579 for(i=0; i<=pCur->iPage; i++){ 587 for(i=0; i<=pCur->iPage; i++){
580 releasePage(pCur->apPage[i]); 588 releasePage(pCur->apPage[i]);
581 pCur->apPage[i] = 0; 589 pCur->apPage[i] = 0;
582 } 590 }
583 pCur->iPage = -1; 591 pCur->iPage = -1;
584 } 592 }
585 593
586
587 /* 594 /*
588 ** Save the current cursor position in the variables BtCursor.nKey 595 ** The cursor passed as the only argument must point to a valid entry
589 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. 596 ** when this function is called (i.e. have eState==CURSOR_VALID). This
597 ** function saves the current cursor key in variables pCur->nKey and
598 ** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error
599 ** code otherwise.
590 ** 600 **
591 ** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID) 601 ** If the cursor is open on an intkey table, then the integer key
592 ** prior to calling this routine. 602 ** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to
603 ** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is
604 ** set to point to a malloced buffer pCur->nKey bytes in size containing
605 ** the key.
593 */ 606 */
594 static int saveCursorPosition(BtCursor *pCur){ 607 static int saveCursorKey(BtCursor *pCur){
595 int rc; 608 int rc;
596
597 assert( CURSOR_VALID==pCur->eState ); 609 assert( CURSOR_VALID==pCur->eState );
598 assert( 0==pCur->pKey ); 610 assert( 0==pCur->pKey );
599 assert( cursorHoldsMutex(pCur) ); 611 assert( cursorHoldsMutex(pCur) );
600 612
601 rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); 613 rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
602 assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ 614 assert( rc==SQLITE_OK ); /* KeySize() cannot fail */
603 615
604 /* If this is an intKey table, then the above call to BtreeKeySize() 616 /* If this is an intKey table, then the above call to BtreeKeySize()
605 ** stores the integer key in pCur->nKey. In this case this value is 617 ** stores the integer key in pCur->nKey. In this case this value is
606 ** all that is required. Otherwise, if pCur is not open on an intKey 618 ** all that is required. Otherwise, if pCur is not open on an intKey
607 ** table, then malloc space for and store the pCur->nKey bytes of key 619 ** table, then malloc space for and store the pCur->nKey bytes of key
608 ** data. 620 ** data. */
609 */ 621 if( 0==pCur->curIntKey ){
610 if( 0==pCur->apPage[0]->intKey ){
611 void *pKey = sqlite3Malloc( pCur->nKey ); 622 void *pKey = sqlite3Malloc( pCur->nKey );
612 if( pKey ){ 623 if( pKey ){
613 rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); 624 rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey);
614 if( rc==SQLITE_OK ){ 625 if( rc==SQLITE_OK ){
615 pCur->pKey = pKey; 626 pCur->pKey = pKey;
616 }else{ 627 }else{
617 sqlite3_free(pKey); 628 sqlite3_free(pKey);
618 } 629 }
619 }else{ 630 }else{
620 rc = SQLITE_NOMEM; 631 rc = SQLITE_NOMEM;
621 } 632 }
622 } 633 }
623 assert( !pCur->apPage[0]->intKey || !pCur->pKey ); 634 assert( !pCur->curIntKey || !pCur->pKey );
635 return rc;
636 }
624 637
638 /*
639 ** Save the current cursor position in the variables BtCursor.nKey
640 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
641 **
642 ** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID)
643 ** prior to calling this routine.
644 */
645 static int saveCursorPosition(BtCursor *pCur){
646 int rc;
647
648 assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState );
649 assert( 0==pCur->pKey );
650 assert( cursorHoldsMutex(pCur) );
651
652 if( pCur->eState==CURSOR_SKIPNEXT ){
653 pCur->eState = CURSOR_VALID;
654 }else{
655 pCur->skipNext = 0;
656 }
657
658 rc = saveCursorKey(pCur);
625 if( rc==SQLITE_OK ){ 659 if( rc==SQLITE_OK ){
626 btreeReleaseAllCursorPages(pCur); 660 btreeReleaseAllCursorPages(pCur);
627 pCur->eState = CURSOR_REQUIRESEEK; 661 pCur->eState = CURSOR_REQUIRESEEK;
628 } 662 }
629 663
630 invalidateOverflowCache(pCur); 664 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl|BTCF_AtLast);
631 return rc; 665 return rc;
632 } 666 }
633 667
634 /* Forward reference */ 668 /* Forward reference */
635 static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); 669 static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*);
636 670
637 /* 671 /*
638 ** Save the positions of all cursors (except pExcept) that are open on 672 ** Save the positions of all cursors (except pExcept) that are open on
639 ** the table with root-page iRoot. "Saving the cursor position" means that 673 ** the table with root-page iRoot. "Saving the cursor position" means that
640 ** the location in the btree is remembered in such a way that it can be 674 ** the location in the btree is remembered in such a way that it can be
641 ** moved back to the same spot after the btree has been modified. This 675 ** moved back to the same spot after the btree has been modified. This
642 ** routine is called just before cursor pExcept is used to modify the 676 ** routine is called just before cursor pExcept is used to modify the
643 ** table, for example in BtreeDelete() or BtreeInsert(). 677 ** table, for example in BtreeDelete() or BtreeInsert().
644 ** 678 **
679 ** If there are two or more cursors on the same btree, then all such
680 ** cursors should have their BTCF_Multiple flag set. The btreeCursor()
681 ** routine enforces that rule. This routine only needs to be called in
682 ** the uncommon case when pExpect has the BTCF_Multiple flag set.
683 **
684 ** If pExpect!=NULL and if no other cursors are found on the same root-page,
685 ** then the BTCF_Multiple flag on pExpect is cleared, to avoid another
686 ** pointless call to this routine.
687 **
645 ** Implementation note: This routine merely checks to see if any cursors 688 ** Implementation note: This routine merely checks to see if any cursors
646 ** need to be saved. It calls out to saveCursorsOnList() in the (unusual) 689 ** need to be saved. It calls out to saveCursorsOnList() in the (unusual)
647 ** event that cursors are in need to being saved. 690 ** event that cursors are in need to being saved.
648 */ 691 */
649 static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ 692 static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
650 BtCursor *p; 693 BtCursor *p;
651 assert( sqlite3_mutex_held(pBt->mutex) ); 694 assert( sqlite3_mutex_held(pBt->mutex) );
652 assert( pExcept==0 || pExcept->pBt==pBt ); 695 assert( pExcept==0 || pExcept->pBt==pBt );
653 for(p=pBt->pCursor; p; p=p->pNext){ 696 for(p=pBt->pCursor; p; p=p->pNext){
654 if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; 697 if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break;
655 } 698 }
656 return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK; 699 if( p ) return saveCursorsOnList(p, iRoot, pExcept);
700 if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple;
701 return SQLITE_OK;
657 } 702 }
658 703
659 /* This helper routine to saveAllCursors does the actual work of saving 704 /* This helper routine to saveAllCursors does the actual work of saving
660 ** the cursors if and when a cursor is found that actually requires saving. 705 ** the cursors if and when a cursor is found that actually requires saving.
661 ** The common case is that no cursors need to be saved, so this routine is 706 ** The common case is that no cursors need to be saved, so this routine is
662 ** broken out from its caller to avoid unnecessary stack pointer movement. 707 ** broken out from its caller to avoid unnecessary stack pointer movement.
663 */ 708 */
664 static int SQLITE_NOINLINE saveCursorsOnList( 709 static int SQLITE_NOINLINE saveCursorsOnList(
665 BtCursor *p, /* The first cursor that needs saving */ 710 BtCursor *p, /* The first cursor that needs saving */
666 Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */ 711 Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */
667 BtCursor *pExcept /* Do not save this cursor */ 712 BtCursor *pExcept /* Do not save this cursor */
668 ){ 713 ){
669 do{ 714 do{
670 if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ 715 if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){
671 if( p->eState==CURSOR_VALID ){ 716 if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){
672 int rc = saveCursorPosition(p); 717 int rc = saveCursorPosition(p);
673 if( SQLITE_OK!=rc ){ 718 if( SQLITE_OK!=rc ){
674 return rc; 719 return rc;
675 } 720 }
676 }else{ 721 }else{
677 testcase( p->iPage>0 ); 722 testcase( p->iPage>0 );
678 btreeReleaseAllCursorPages(p); 723 btreeReleaseAllCursorPages(p);
679 } 724 }
680 } 725 }
681 p = p->pNext; 726 p = p->pNext;
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
733 778
734 /* 779 /*
735 ** Restore the cursor to the position it was in (or as close to as possible) 780 ** Restore the cursor to the position it was in (or as close to as possible)
736 ** when saveCursorPosition() was called. Note that this call deletes the 781 ** when saveCursorPosition() was called. Note that this call deletes the
737 ** saved position info stored by saveCursorPosition(), so there can be 782 ** saved position info stored by saveCursorPosition(), so there can be
738 ** at most one effective restoreCursorPosition() call after each 783 ** at most one effective restoreCursorPosition() call after each
739 ** saveCursorPosition(). 784 ** saveCursorPosition().
740 */ 785 */
741 static int btreeRestoreCursorPosition(BtCursor *pCur){ 786 static int btreeRestoreCursorPosition(BtCursor *pCur){
742 int rc; 787 int rc;
788 int skipNext;
743 assert( cursorHoldsMutex(pCur) ); 789 assert( cursorHoldsMutex(pCur) );
744 assert( pCur->eState>=CURSOR_REQUIRESEEK ); 790 assert( pCur->eState>=CURSOR_REQUIRESEEK );
745 if( pCur->eState==CURSOR_FAULT ){ 791 if( pCur->eState==CURSOR_FAULT ){
746 return pCur->skipNext; 792 return pCur->skipNext;
747 } 793 }
748 pCur->eState = CURSOR_INVALID; 794 pCur->eState = CURSOR_INVALID;
749 rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skipNext); 795 rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext);
750 if( rc==SQLITE_OK ){ 796 if( rc==SQLITE_OK ){
751 sqlite3_free(pCur->pKey); 797 sqlite3_free(pCur->pKey);
752 pCur->pKey = 0; 798 pCur->pKey = 0;
753 assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); 799 assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
800 pCur->skipNext |= skipNext;
754 if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ 801 if( pCur->skipNext && pCur->eState==CURSOR_VALID ){
755 pCur->eState = CURSOR_SKIPNEXT; 802 pCur->eState = CURSOR_SKIPNEXT;
756 } 803 }
757 } 804 }
758 return rc; 805 return rc;
759 } 806 }
760 807
761 #define restoreCursorPosition(p) \ 808 #define restoreCursorPosition(p) \
762 (p->eState>=CURSOR_REQUIRESEEK ? \ 809 (p->eState>=CURSOR_REQUIRESEEK ? \
763 btreeRestoreCursorPosition(p) : \ 810 btreeRestoreCursorPosition(p) : \
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
795 int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ 842 int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){
796 int rc; 843 int rc;
797 844
798 assert( pCur!=0 ); 845 assert( pCur!=0 );
799 assert( pCur->eState!=CURSOR_VALID ); 846 assert( pCur->eState!=CURSOR_VALID );
800 rc = restoreCursorPosition(pCur); 847 rc = restoreCursorPosition(pCur);
801 if( rc ){ 848 if( rc ){
802 *pDifferentRow = 1; 849 *pDifferentRow = 1;
803 return rc; 850 return rc;
804 } 851 }
805 if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){ 852 if( pCur->eState!=CURSOR_VALID ){
806 *pDifferentRow = 1; 853 *pDifferentRow = 1;
807 }else{ 854 }else{
855 assert( pCur->skipNext==0 );
808 *pDifferentRow = 0; 856 *pDifferentRow = 0;
809 } 857 }
810 return SQLITE_OK; 858 return SQLITE_OK;
811 } 859 }
812 860
861 #ifdef SQLITE_ENABLE_CURSOR_HINTS
862 /*
863 ** Provide hints to the cursor. The particular hint given (and the type
864 ** and number of the varargs parameters) is determined by the eHintType
865 ** parameter. See the definitions of the BTREE_HINT_* macros for details.
866 */
867 void sqlite3BtreeCursorHint(BtCursor *pCur, int eHintType, ...){
868 /* Used only by system that substitute their own storage engine */
869 }
870 #endif
871
872 /*
873 ** Provide flag hints to the cursor.
874 */
875 void sqlite3BtreeCursorHintFlags(BtCursor *pCur, unsigned x){
876 assert( x==BTREE_SEEK_EQ || x==BTREE_BULKLOAD || x==0 );
877 pCur->hints = x;
878 }
879
880
813 #ifndef SQLITE_OMIT_AUTOVACUUM 881 #ifndef SQLITE_OMIT_AUTOVACUUM
814 /* 882 /*
815 ** Given a page number of a regular database page, return the page 883 ** Given a page number of a regular database page, return the page
816 ** number for the pointer-map page that contains the entry for the 884 ** number for the pointer-map page that contains the entry for the
817 ** input page number. 885 ** input page number.
818 ** 886 **
819 ** Return 0 (not a valid page) for pgno==1 since there is 887 ** Return 0 (not a valid page) for pgno==1 since there is
820 ** no pointer map associated with page 1. The integrity_check logic 888 ** no pointer map associated with page 1. The integrity_check logic
821 ** requires that ptrmapPageno(*,1)!=1. 889 ** requires that ptrmapPageno(*,1)!=1.
822 */ 890 */
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
856 assert( sqlite3_mutex_held(pBt->mutex) ); 924 assert( sqlite3_mutex_held(pBt->mutex) );
857 /* The master-journal page number must never be used as a pointer map page */ 925 /* The master-journal page number must never be used as a pointer map page */
858 assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); 926 assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
859 927
860 assert( pBt->autoVacuum ); 928 assert( pBt->autoVacuum );
861 if( key==0 ){ 929 if( key==0 ){
862 *pRC = SQLITE_CORRUPT_BKPT; 930 *pRC = SQLITE_CORRUPT_BKPT;
863 return; 931 return;
864 } 932 }
865 iPtrmap = PTRMAP_PAGENO(pBt, key); 933 iPtrmap = PTRMAP_PAGENO(pBt, key);
866 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); 934 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0);
867 if( rc!=SQLITE_OK ){ 935 if( rc!=SQLITE_OK ){
868 *pRC = rc; 936 *pRC = rc;
869 return; 937 return;
870 } 938 }
871 offset = PTRMAP_PTROFFSET(iPtrmap, key); 939 offset = PTRMAP_PTROFFSET(iPtrmap, key);
872 if( offset<0 ){ 940 if( offset<0 ){
873 *pRC = SQLITE_CORRUPT_BKPT; 941 *pRC = SQLITE_CORRUPT_BKPT;
874 goto ptrmap_exit; 942 goto ptrmap_exit;
875 } 943 }
876 assert( offset <= (int)pBt->usableSize-5 ); 944 assert( offset <= (int)pBt->usableSize-5 );
(...skipping 22 matching lines...) Expand all
899 static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ 967 static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
900 DbPage *pDbPage; /* The pointer map page */ 968 DbPage *pDbPage; /* The pointer map page */
901 int iPtrmap; /* Pointer map page index */ 969 int iPtrmap; /* Pointer map page index */
902 u8 *pPtrmap; /* Pointer map page data */ 970 u8 *pPtrmap; /* Pointer map page data */
903 int offset; /* Offset of entry in pointer map */ 971 int offset; /* Offset of entry in pointer map */
904 int rc; 972 int rc;
905 973
906 assert( sqlite3_mutex_held(pBt->mutex) ); 974 assert( sqlite3_mutex_held(pBt->mutex) );
907 975
908 iPtrmap = PTRMAP_PAGENO(pBt, key); 976 iPtrmap = PTRMAP_PAGENO(pBt, key);
909 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); 977 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage, 0);
910 if( rc!=0 ){ 978 if( rc!=0 ){
911 return rc; 979 return rc;
912 } 980 }
913 pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); 981 pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
914 982
915 offset = PTRMAP_PTROFFSET(iPtrmap, key); 983 offset = PTRMAP_PTROFFSET(iPtrmap, key);
916 if( offset<0 ){ 984 if( offset<0 ){
917 sqlite3PagerUnref(pDbPage); 985 sqlite3PagerUnref(pDbPage);
918 return SQLITE_CORRUPT_BKPT; 986 return SQLITE_CORRUPT_BKPT;
919 } 987 }
(...skipping 11 matching lines...) Expand all
931 #define ptrmapPut(w,x,y,z,rc) 999 #define ptrmapPut(w,x,y,z,rc)
932 #define ptrmapGet(w,x,y,z) SQLITE_OK 1000 #define ptrmapGet(w,x,y,z) SQLITE_OK
933 #define ptrmapPutOvflPtr(x, y, rc) 1001 #define ptrmapPutOvflPtr(x, y, rc)
934 #endif 1002 #endif
935 1003
936 /* 1004 /*
937 ** Given a btree page and a cell index (0 means the first cell on 1005 ** Given a btree page and a cell index (0 means the first cell on
938 ** the page, 1 means the second cell, and so forth) return a pointer 1006 ** the page, 1 means the second cell, and so forth) return a pointer
939 ** to the cell content. 1007 ** to the cell content.
940 ** 1008 **
1009 ** findCellPastPtr() does the same except it skips past the initial
1010 ** 4-byte child pointer found on interior pages, if there is one.
1011 **
941 ** This routine works only for pages that do not contain overflow cells. 1012 ** This routine works only for pages that do not contain overflow cells.
942 */ 1013 */
943 #define findCell(P,I) \ 1014 #define findCell(P,I) \
944 ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) 1015 ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
945 #define findCellv2(D,M,O,I) (D+(M&get2byte(D+(O+2*(I))))) 1016 #define findCellPastPtr(P,I) \
1017 ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
946 1018
947 1019
948 /* 1020 /*
949 ** This a more complex version of findCell() that works for 1021 ** This is common tail processing for btreeParseCellPtr() and
950 ** pages that do contain overflow cells. 1022 ** btreeParseCellPtrIndex() for the case when the cell does not fit entirely
1023 ** on a single B-tree page. Make necessary adjustments to the CellInfo
1024 ** structure.
951 */ 1025 */
952 static u8 *findOverflowCell(MemPage *pPage, int iCell){ 1026 static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow(
953 int i; 1027 MemPage *pPage, /* Page containing the cell */
954 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1028 u8 *pCell, /* Pointer to the cell text. */
955 for(i=pPage->nOverflow-1; i>=0; i--){ 1029 CellInfo *pInfo /* Fill in this structure */
956 int k; 1030 ){
957 k = pPage->aiOvfl[i]; 1031 /* If the payload will not fit completely on the local page, we have
958 if( k<=iCell ){ 1032 ** to decide how much to store locally and how much to spill onto
959 if( k==iCell ){ 1033 ** overflow pages. The strategy is to minimize the amount of unused
960 return pPage->apOvfl[i]; 1034 ** space on overflow pages while keeping the amount of local storage
961 } 1035 ** in between minLocal and maxLocal.
962 iCell--; 1036 **
963 } 1037 ** Warning: changing the way overflow payload is distributed in any
1038 ** way will result in an incompatible file format.
1039 */
1040 int minLocal; /* Minimum amount of payload held locally */
1041 int maxLocal; /* Maximum amount of payload held locally */
1042 int surplus; /* Overflow payload available for local storage */
1043
1044 minLocal = pPage->minLocal;
1045 maxLocal = pPage->maxLocal;
1046 surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4);
1047 testcase( surplus==maxLocal );
1048 testcase( surplus==maxLocal+1 );
1049 if( surplus <= maxLocal ){
1050 pInfo->nLocal = (u16)surplus;
1051 }else{
1052 pInfo->nLocal = (u16)minLocal;
964 } 1053 }
965 return findCell(pPage, iCell); 1054 pInfo->nSize = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell) + 4;
966 } 1055 }
967 1056
968 /* 1057 /*
969 ** Parse a cell content block and fill in the CellInfo structure. There 1058 ** The following routines are implementations of the MemPage.xParseCell()
970 ** are two versions of this function. btreeParseCell() takes a 1059 ** method.
971 ** cell index as the second argument and btreeParseCellPtr() 1060 **
972 ** takes a pointer to the body of the cell as its second argument. 1061 ** Parse a cell content block and fill in the CellInfo structure.
1062 **
1063 ** btreeParseCellPtr() => table btree leaf nodes
1064 ** btreeParseCellNoPayload() => table btree internal nodes
1065 ** btreeParseCellPtrIndex() => index btree nodes
1066 **
1067 ** There is also a wrapper function btreeParseCell() that works for
1068 ** all MemPage types and that references the cell by index rather than
1069 ** by pointer.
973 */ 1070 */
1071 static void btreeParseCellPtrNoPayload(
1072 MemPage *pPage, /* Page containing the cell */
1073 u8 *pCell, /* Pointer to the cell text. */
1074 CellInfo *pInfo /* Fill in this structure */
1075 ){
1076 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1077 assert( pPage->leaf==0 );
1078 assert( pPage->noPayload );
1079 assert( pPage->childPtrSize==4 );
1080 #ifndef SQLITE_DEBUG
1081 UNUSED_PARAMETER(pPage);
1082 #endif
1083 pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey);
1084 pInfo->nPayload = 0;
1085 pInfo->nLocal = 0;
1086 pInfo->pPayload = 0;
1087 return;
1088 }
974 static void btreeParseCellPtr( 1089 static void btreeParseCellPtr(
975 MemPage *pPage, /* Page containing the cell */ 1090 MemPage *pPage, /* Page containing the cell */
976 u8 *pCell, /* Pointer to the cell text. */ 1091 u8 *pCell, /* Pointer to the cell text. */
977 CellInfo *pInfo /* Fill in this structure */ 1092 CellInfo *pInfo /* Fill in this structure */
978 ){ 1093 ){
979 u8 *pIter; /* For scanning through pCell */ 1094 u8 *pIter; /* For scanning through pCell */
980 u32 nPayload; /* Number of bytes of cell payload */ 1095 u32 nPayload; /* Number of bytes of cell payload */
1096 u64 iKey; /* Extracted Key value */
981 1097
982 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1098 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
983 assert( pPage->leaf==0 || pPage->leaf==1 ); 1099 assert( pPage->leaf==0 || pPage->leaf==1 );
984 if( pPage->intKeyLeaf ){ 1100 assert( pPage->intKeyLeaf || pPage->noPayload );
985 assert( pPage->childPtrSize==0 ); 1101 assert( pPage->noPayload==0 );
986 pIter = pCell + getVarint32(pCell, nPayload); 1102 assert( pPage->intKeyLeaf );
987 pIter += getVarint(pIter, (u64*)&pInfo->nKey); 1103 assert( pPage->childPtrSize==0 );
988 }else if( pPage->noPayload ){ 1104 pIter = pCell;
989 assert( pPage->childPtrSize==4 ); 1105
990 pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); 1106 /* The next block of code is equivalent to:
991 pInfo->nPayload = 0; 1107 **
992 pInfo->nLocal = 0; 1108 ** pIter += getVarint32(pIter, nPayload);
993 pInfo->iOverflow = 0; 1109 **
994 pInfo->pPayload = 0; 1110 ** The code is inlined to avoid a function call.
995 return; 1111 */
996 }else{ 1112 nPayload = *pIter;
997 pIter = pCell + pPage->childPtrSize; 1113 if( nPayload>=0x80 ){
998 pIter += getVarint32(pIter, nPayload); 1114 u8 *pEnd = &pIter[8];
999 pInfo->nKey = nPayload; 1115 nPayload &= 0x7f;
1116 do{
1117 nPayload = (nPayload<<7) | (*++pIter & 0x7f);
1118 }while( (*pIter)>=0x80 && pIter<pEnd );
1000 } 1119 }
1120 pIter++;
1121
1122 /* The next block of code is equivalent to:
1123 **
1124 ** pIter += getVarint(pIter, (u64*)&pInfo->nKey);
1125 **
1126 ** The code is inlined to avoid a function call.
1127 */
1128 iKey = *pIter;
1129 if( iKey>=0x80 ){
1130 u8 *pEnd = &pIter[7];
1131 iKey &= 0x7f;
1132 while(1){
1133 iKey = (iKey<<7) | (*++pIter & 0x7f);
1134 if( (*pIter)<0x80 ) break;
1135 if( pIter>=pEnd ){
1136 iKey = (iKey<<8) | *++pIter;
1137 break;
1138 }
1139 }
1140 }
1141 pIter++;
1142
1143 pInfo->nKey = *(i64*)&iKey;
1001 pInfo->nPayload = nPayload; 1144 pInfo->nPayload = nPayload;
1002 pInfo->pPayload = pIter; 1145 pInfo->pPayload = pIter;
1003 testcase( nPayload==pPage->maxLocal ); 1146 testcase( nPayload==pPage->maxLocal );
1004 testcase( nPayload==pPage->maxLocal+1 ); 1147 testcase( nPayload==pPage->maxLocal+1 );
1005 if( nPayload<=pPage->maxLocal ){ 1148 if( nPayload<=pPage->maxLocal ){
1006 /* This is the (easy) common case where the entire payload fits 1149 /* This is the (easy) common case where the entire payload fits
1007 ** on the local page. No overflow is required. 1150 ** on the local page. No overflow is required.
1008 */ 1151 */
1009 pInfo->nSize = nPayload + (u16)(pIter - pCell); 1152 pInfo->nSize = nPayload + (u16)(pIter - pCell);
1010 if( pInfo->nSize<4 ) pInfo->nSize = 4; 1153 if( pInfo->nSize<4 ) pInfo->nSize = 4;
1011 pInfo->nLocal = (u16)nPayload; 1154 pInfo->nLocal = (u16)nPayload;
1012 pInfo->iOverflow = 0;
1013 }else{ 1155 }else{
1014 /* If the payload will not fit completely on the local page, we have 1156 btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo);
1015 ** to decide how much to store locally and how much to spill onto 1157 }
1016 ** overflow pages. The strategy is to minimize the amount of unused 1158 }
1017 ** space on overflow pages while keeping the amount of local storage 1159 static void btreeParseCellPtrIndex(
1018 ** in between minLocal and maxLocal. 1160 MemPage *pPage, /* Page containing the cell */
1019 ** 1161 u8 *pCell, /* Pointer to the cell text. */
1020 ** Warning: changing the way overflow payload is distributed in any 1162 CellInfo *pInfo /* Fill in this structure */
1021 ** way will result in an incompatible file format. 1163 ){
1164 u8 *pIter; /* For scanning through pCell */
1165 u32 nPayload; /* Number of bytes of cell payload */
1166
1167 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1168 assert( pPage->leaf==0 || pPage->leaf==1 );
1169 assert( pPage->intKeyLeaf==0 );
1170 assert( pPage->noPayload==0 );
1171 pIter = pCell + pPage->childPtrSize;
1172 nPayload = *pIter;
1173 if( nPayload>=0x80 ){
1174 u8 *pEnd = &pIter[8];
1175 nPayload &= 0x7f;
1176 do{
1177 nPayload = (nPayload<<7) | (*++pIter & 0x7f);
1178 }while( *(pIter)>=0x80 && pIter<pEnd );
1179 }
1180 pIter++;
1181 pInfo->nKey = nPayload;
1182 pInfo->nPayload = nPayload;
1183 pInfo->pPayload = pIter;
1184 testcase( nPayload==pPage->maxLocal );
1185 testcase( nPayload==pPage->maxLocal+1 );
1186 if( nPayload<=pPage->maxLocal ){
1187 /* This is the (easy) common case where the entire payload fits
1188 ** on the local page. No overflow is required.
1022 */ 1189 */
1023 int minLocal; /* Minimum amount of payload held locally */ 1190 pInfo->nSize = nPayload + (u16)(pIter - pCell);
1024 int maxLocal; /* Maximum amount of payload held locally */ 1191 if( pInfo->nSize<4 ) pInfo->nSize = 4;
1025 int surplus; /* Overflow payload available for local storage */ 1192 pInfo->nLocal = (u16)nPayload;
1026 1193 }else{
1027 minLocal = pPage->minLocal; 1194 btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo);
1028 maxLocal = pPage->maxLocal;
1029 surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
1030 testcase( surplus==maxLocal );
1031 testcase( surplus==maxLocal+1 );
1032 if( surplus <= maxLocal ){
1033 pInfo->nLocal = (u16)surplus;
1034 }else{
1035 pInfo->nLocal = (u16)minLocal;
1036 }
1037 pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell);
1038 pInfo->nSize = pInfo->iOverflow + 4;
1039 } 1195 }
1040 } 1196 }
1041 static void btreeParseCell( 1197 static void btreeParseCell(
1042 MemPage *pPage, /* Page containing the cell */ 1198 MemPage *pPage, /* Page containing the cell */
1043 int iCell, /* The cell index. First cell is 0 */ 1199 int iCell, /* The cell index. First cell is 0 */
1044 CellInfo *pInfo /* Fill in this structure */ 1200 CellInfo *pInfo /* Fill in this structure */
1045 ){ 1201 ){
1046 btreeParseCellPtr(pPage, findCell(pPage, iCell), pInfo); 1202 pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo);
1047 } 1203 }
1048 1204
1049 /* 1205 /*
1206 ** The following routines are implementations of the MemPage.xCellSize
1207 ** method.
1208 **
1050 ** Compute the total number of bytes that a Cell needs in the cell 1209 ** Compute the total number of bytes that a Cell needs in the cell
1051 ** data area of the btree-page. The return number includes the cell 1210 ** data area of the btree-page. The return number includes the cell
1052 ** data header and the local payload, but not any overflow page or 1211 ** data header and the local payload, but not any overflow page or
1053 ** the space used by the cell pointer. 1212 ** the space used by the cell pointer.
1213 **
1214 ** cellSizePtrNoPayload() => table internal nodes
1215 ** cellSizePtr() => all index nodes & table leaf nodes
1054 */ 1216 */
1055 static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ 1217 static u16 cellSizePtr(MemPage *pPage, u8 *pCell){
1056 u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ 1218 u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */
1057 u8 *pEnd; /* End mark for a varint */ 1219 u8 *pEnd; /* End mark for a varint */
1058 u32 nSize; /* Size value to return */ 1220 u32 nSize; /* Size value to return */
1059 1221
1060 #ifdef SQLITE_DEBUG 1222 #ifdef SQLITE_DEBUG
1061 /* The value returned by this function should always be the same as 1223 /* The value returned by this function should always be the same as
1062 ** the (CellInfo.nSize) value found by doing a full parse of the 1224 ** the (CellInfo.nSize) value found by doing a full parse of the
1063 ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of 1225 ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
1064 ** this function verifies that this invariant is not violated. */ 1226 ** this function verifies that this invariant is not violated. */
1065 CellInfo debuginfo; 1227 CellInfo debuginfo;
1066 btreeParseCellPtr(pPage, pCell, &debuginfo); 1228 pPage->xParseCell(pPage, pCell, &debuginfo);
1067 #endif 1229 #endif
1068 1230
1069 if( pPage->noPayload ){ 1231 assert( pPage->noPayload==0 );
1070 pEnd = &pIter[9];
1071 while( (*pIter++)&0x80 && pIter<pEnd );
1072 assert( pPage->childPtrSize==4 );
1073 return (u16)(pIter - pCell);
1074 }
1075 nSize = *pIter; 1232 nSize = *pIter;
1076 if( nSize>=0x80 ){ 1233 if( nSize>=0x80 ){
1077 pEnd = &pIter[9]; 1234 pEnd = &pIter[8];
1078 nSize &= 0x7f; 1235 nSize &= 0x7f;
1079 do{ 1236 do{
1080 nSize = (nSize<<7) | (*++pIter & 0x7f); 1237 nSize = (nSize<<7) | (*++pIter & 0x7f);
1081 }while( *(pIter)>=0x80 && pIter<pEnd ); 1238 }while( *(pIter)>=0x80 && pIter<pEnd );
1082 } 1239 }
1083 pIter++; 1240 pIter++;
1084 if( pPage->intKey ){ 1241 if( pPage->intKey ){
1085 /* pIter now points at the 64-bit integer key value, a variable length 1242 /* pIter now points at the 64-bit integer key value, a variable length
1086 ** integer. The following block moves pIter to point at the first byte 1243 ** integer. The following block moves pIter to point at the first byte
1087 ** past the end of the key value. */ 1244 ** past the end of the key value. */
(...skipping 11 matching lines...) Expand all
1099 testcase( nSize==pPage->maxLocal ); 1256 testcase( nSize==pPage->maxLocal );
1100 testcase( nSize==pPage->maxLocal+1 ); 1257 testcase( nSize==pPage->maxLocal+1 );
1101 if( nSize>pPage->maxLocal ){ 1258 if( nSize>pPage->maxLocal ){
1102 nSize = minLocal; 1259 nSize = minLocal;
1103 } 1260 }
1104 nSize += 4 + (u16)(pIter - pCell); 1261 nSize += 4 + (u16)(pIter - pCell);
1105 } 1262 }
1106 assert( nSize==debuginfo.nSize || CORRUPT_DB ); 1263 assert( nSize==debuginfo.nSize || CORRUPT_DB );
1107 return (u16)nSize; 1264 return (u16)nSize;
1108 } 1265 }
1266 static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){
1267 u8 *pIter = pCell + 4; /* For looping over bytes of pCell */
1268 u8 *pEnd; /* End mark for a varint */
1269
1270 #ifdef SQLITE_DEBUG
1271 /* The value returned by this function should always be the same as
1272 ** the (CellInfo.nSize) value found by doing a full parse of the
1273 ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of
1274 ** this function verifies that this invariant is not violated. */
1275 CellInfo debuginfo;
1276 pPage->xParseCell(pPage, pCell, &debuginfo);
1277 #else
1278 UNUSED_PARAMETER(pPage);
1279 #endif
1280
1281 assert( pPage->childPtrSize==4 );
1282 pEnd = pIter + 9;
1283 while( (*pIter++)&0x80 && pIter<pEnd );
1284 assert( debuginfo.nSize==(u16)(pIter - pCell) || CORRUPT_DB );
1285 return (u16)(pIter - pCell);
1286 }
1287
1109 1288
1110 #ifdef SQLITE_DEBUG 1289 #ifdef SQLITE_DEBUG
1111 /* This variation on cellSizePtr() is used inside of assert() statements 1290 /* This variation on cellSizePtr() is used inside of assert() statements
1112 ** only. */ 1291 ** only. */
1113 static u16 cellSize(MemPage *pPage, int iCell){ 1292 static u16 cellSize(MemPage *pPage, int iCell){
1114 return cellSizePtr(pPage, findCell(pPage, iCell)); 1293 return pPage->xCellSize(pPage, findCell(pPage, iCell));
1115 } 1294 }
1116 #endif 1295 #endif
1117 1296
1118 #ifndef SQLITE_OMIT_AUTOVACUUM 1297 #ifndef SQLITE_OMIT_AUTOVACUUM
1119 /* 1298 /*
1120 ** If the cell pCell, part of page pPage contains a pointer 1299 ** If the cell pCell, part of page pPage contains a pointer
1121 ** to an overflow page, insert an entry into the pointer-map 1300 ** to an overflow page, insert an entry into the pointer-map
1122 ** for the overflow page. 1301 ** for the overflow page.
1123 */ 1302 */
1124 static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ 1303 static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){
1125 CellInfo info; 1304 CellInfo info;
1126 if( *pRC ) return; 1305 if( *pRC ) return;
1127 assert( pCell!=0 ); 1306 assert( pCell!=0 );
1128 btreeParseCellPtr(pPage, pCell, &info); 1307 pPage->xParseCell(pPage, pCell, &info);
1129 if( info.iOverflow ){ 1308 if( info.nLocal<info.nPayload ){
1130 Pgno ovfl = get4byte(&pCell[info.iOverflow]); 1309 Pgno ovfl = get4byte(&pCell[info.nSize-4]);
1131 ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); 1310 ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC);
1132 } 1311 }
1133 } 1312 }
1134 #endif 1313 #endif
1135 1314
1136 1315
1137 /* 1316 /*
1138 ** Defragment the page given. All Cells are moved to the 1317 ** Defragment the page given. All Cells are moved to the
1139 ** end of the page and all free space is collected into one 1318 ** end of the page and all free space is collected into one
1140 ** big FreeBlk that occurs in between the header and cell 1319 ** big FreeBlk that occurs in between the header and cell
1141 ** pointer array and the cell content area. 1320 ** pointer array and the cell content area.
1321 **
1322 ** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a
1323 ** b-tree page so that there are no freeblocks or fragment bytes, all
1324 ** unused bytes are contained in the unallocated space region, and all
1325 ** cells are packed tightly at the end of the page.
1142 */ 1326 */
1143 static int defragmentPage(MemPage *pPage){ 1327 static int defragmentPage(MemPage *pPage){
1144 int i; /* Loop counter */ 1328 int i; /* Loop counter */
1145 int pc; /* Address of the i-th cell */ 1329 int pc; /* Address of the i-th cell */
1146 int hdr; /* Offset to the page header */ 1330 int hdr; /* Offset to the page header */
1147 int size; /* Size of a cell */ 1331 int size; /* Size of a cell */
1148 int usableSize; /* Number of usable bytes on a page */ 1332 int usableSize; /* Number of usable bytes on a page */
1149 int cellOffset; /* Offset to the cell pointer array */ 1333 int cellOffset; /* Offset to the cell pointer array */
1150 int cbrk; /* Offset to the cell content area */ 1334 int cbrk; /* Offset to the cell content area */
1151 int nCell; /* Number of cells on the page */ 1335 int nCell; /* Number of cells on the page */
1152 unsigned char *data; /* The page data */ 1336 unsigned char *data; /* The page data */
1153 unsigned char *temp; /* Temp area for cell content */ 1337 unsigned char *temp; /* Temp area for cell content */
1338 unsigned char *src; /* Source of content */
1154 int iCellFirst; /* First allowable cell index */ 1339 int iCellFirst; /* First allowable cell index */
1155 int iCellLast; /* Last possible cell index */ 1340 int iCellLast; /* Last possible cell index */
1156 1341
1157 1342
1158 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 1343 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1159 assert( pPage->pBt!=0 ); 1344 assert( pPage->pBt!=0 );
1160 assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); 1345 assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
1161 assert( pPage->nOverflow==0 ); 1346 assert( pPage->nOverflow==0 );
1162 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1347 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1163 temp = sqlite3PagerTempSpace(pPage->pBt->pPager); 1348 temp = 0;
1164 data = pPage->aData; 1349 src = data = pPage->aData;
1165 hdr = pPage->hdrOffset; 1350 hdr = pPage->hdrOffset;
1166 cellOffset = pPage->cellOffset; 1351 cellOffset = pPage->cellOffset;
1167 nCell = pPage->nCell; 1352 nCell = pPage->nCell;
1168 assert( nCell==get2byte(&data[hdr+3]) ); 1353 assert( nCell==get2byte(&data[hdr+3]) );
1169 usableSize = pPage->pBt->usableSize; 1354 usableSize = pPage->pBt->usableSize;
1170 cbrk = get2byte(&data[hdr+5]);
1171 memcpy(&temp[cbrk], &data[cbrk], usableSize - cbrk);
1172 cbrk = usableSize; 1355 cbrk = usableSize;
1173 iCellFirst = cellOffset + 2*nCell; 1356 iCellFirst = cellOffset + 2*nCell;
1174 iCellLast = usableSize - 4; 1357 iCellLast = usableSize - 4;
1175 for(i=0; i<nCell; i++){ 1358 for(i=0; i<nCell; i++){
1176 u8 *pAddr; /* The i-th cell pointer */ 1359 u8 *pAddr; /* The i-th cell pointer */
1177 pAddr = &data[cellOffset + i*2]; 1360 pAddr = &data[cellOffset + i*2];
1178 pc = get2byte(pAddr); 1361 pc = get2byte(pAddr);
1179 testcase( pc==iCellFirst ); 1362 testcase( pc==iCellFirst );
1180 testcase( pc==iCellLast ); 1363 testcase( pc==iCellLast );
1181 #if !defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK)
1182 /* These conditions have already been verified in btreeInitPage() 1364 /* These conditions have already been verified in btreeInitPage()
1183 ** if SQLITE_ENABLE_OVERSIZE_CELL_CHECK is defined 1365 ** if PRAGMA cell_size_check=ON.
1184 */ 1366 */
1185 if( pc<iCellFirst || pc>iCellLast ){ 1367 if( pc<iCellFirst || pc>iCellLast ){
1186 return SQLITE_CORRUPT_BKPT; 1368 return SQLITE_CORRUPT_BKPT;
1187 } 1369 }
1188 #endif
1189 assert( pc>=iCellFirst && pc<=iCellLast ); 1370 assert( pc>=iCellFirst && pc<=iCellLast );
1190 size = cellSizePtr(pPage, &temp[pc]); 1371 size = pPage->xCellSize(pPage, &src[pc]);
1191 cbrk -= size; 1372 cbrk -= size;
1192 #if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK)
1193 if( cbrk<iCellFirst ){
1194 return SQLITE_CORRUPT_BKPT;
1195 }
1196 #else
1197 if( cbrk<iCellFirst || pc+size>usableSize ){ 1373 if( cbrk<iCellFirst || pc+size>usableSize ){
1198 return SQLITE_CORRUPT_BKPT; 1374 return SQLITE_CORRUPT_BKPT;
1199 } 1375 }
1200 #endif
1201 assert( cbrk+size<=usableSize && cbrk>=iCellFirst ); 1376 assert( cbrk+size<=usableSize && cbrk>=iCellFirst );
1202 testcase( cbrk+size==usableSize ); 1377 testcase( cbrk+size==usableSize );
1203 testcase( pc+size==usableSize ); 1378 testcase( pc+size==usableSize );
1204 memcpy(&data[cbrk], &temp[pc], size);
1205 put2byte(pAddr, cbrk); 1379 put2byte(pAddr, cbrk);
1380 if( temp==0 ){
1381 int x;
1382 if( cbrk==pc ) continue;
1383 temp = sqlite3PagerTempSpace(pPage->pBt->pPager);
1384 x = get2byte(&data[hdr+5]);
1385 memcpy(&temp[x], &data[x], (cbrk+size) - x);
1386 src = temp;
1387 }
1388 memcpy(&data[cbrk], &src[pc], size);
1206 } 1389 }
1207 assert( cbrk>=iCellFirst ); 1390 assert( cbrk>=iCellFirst );
1208 put2byte(&data[hdr+5], cbrk); 1391 put2byte(&data[hdr+5], cbrk);
1209 data[hdr+1] = 0; 1392 data[hdr+1] = 0;
1210 data[hdr+2] = 0; 1393 data[hdr+2] = 0;
1211 data[hdr+7] = 0; 1394 data[hdr+7] = 0;
1212 memset(&data[iCellFirst], 0, cbrk-iCellFirst); 1395 memset(&data[iCellFirst], 0, cbrk-iCellFirst);
1213 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 1396 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1214 if( cbrk-iCellFirst!=pPage->nFree ){ 1397 if( cbrk-iCellFirst!=pPage->nFree ){
1215 return SQLITE_CORRUPT_BKPT; 1398 return SQLITE_CORRUPT_BKPT;
1216 } 1399 }
1217 return SQLITE_OK; 1400 return SQLITE_OK;
1218 } 1401 }
1219 1402
1220 /* 1403 /*
1404 ** Search the free-list on page pPg for space to store a cell nByte bytes in
1405 ** size. If one can be found, return a pointer to the space and remove it
1406 ** from the free-list.
1407 **
1408 ** If no suitable space can be found on the free-list, return NULL.
1409 **
1410 ** This function may detect corruption within pPg. If corruption is
1411 ** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned.
1412 **
1413 ** Slots on the free list that are between 1 and 3 bytes larger than nByte
1414 ** will be ignored if adding the extra space to the fragmentation count
1415 ** causes the fragmentation count to exceed 60.
1416 */
1417 static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
1418 const int hdr = pPg->hdrOffset;
1419 u8 * const aData = pPg->aData;
1420 int iAddr = hdr + 1;
1421 int pc = get2byte(&aData[iAddr]);
1422 int x;
1423 int usableSize = pPg->pBt->usableSize;
1424
1425 assert( pc>0 );
1426 do{
1427 int size; /* Size of the free slot */
1428 /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of
1429 ** increasing offset. */
1430 if( pc>usableSize-4 || pc<iAddr+4 ){
1431 *pRc = SQLITE_CORRUPT_BKPT;
1432 return 0;
1433 }
1434 /* EVIDENCE-OF: R-22710-53328 The third and fourth bytes of each
1435 ** freeblock form a big-endian integer which is the size of the freeblock
1436 ** in bytes, including the 4-byte header. */
1437 size = get2byte(&aData[pc+2]);
1438 if( (x = size - nByte)>=0 ){
1439 testcase( x==4 );
1440 testcase( x==3 );
1441 if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){
1442 *pRc = SQLITE_CORRUPT_BKPT;
1443 return 0;
1444 }else if( x<4 ){
1445 /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total
1446 ** number of bytes in fragments may not exceed 60. */
1447 if( aData[hdr+7]>57 ) return 0;
1448
1449 /* Remove the slot from the free-list. Update the number of
1450 ** fragmented bytes within the page. */
1451 memcpy(&aData[iAddr], &aData[pc], 2);
1452 aData[hdr+7] += (u8)x;
1453 }else{
1454 /* The slot remains on the free-list. Reduce its size to account
1455 ** for the portion used by the new allocation. */
1456 put2byte(&aData[pc+2], x);
1457 }
1458 return &aData[pc + x];
1459 }
1460 iAddr = pc;
1461 pc = get2byte(&aData[pc]);
1462 }while( pc );
1463
1464 return 0;
1465 }
1466
1467 /*
1221 ** Allocate nByte bytes of space from within the B-Tree page passed 1468 ** Allocate nByte bytes of space from within the B-Tree page passed
1222 ** as the first argument. Write into *pIdx the index into pPage->aData[] 1469 ** as the first argument. Write into *pIdx the index into pPage->aData[]
1223 ** of the first byte of allocated space. Return either SQLITE_OK or 1470 ** of the first byte of allocated space. Return either SQLITE_OK or
1224 ** an error code (usually SQLITE_CORRUPT). 1471 ** an error code (usually SQLITE_CORRUPT).
1225 ** 1472 **
1226 ** The caller guarantees that there is sufficient space to make the 1473 ** The caller guarantees that there is sufficient space to make the
1227 ** allocation. This routine might need to defragment in order to bring 1474 ** allocation. This routine might need to defragment in order to bring
1228 ** all the space together, however. This routine will avoid using 1475 ** all the space together, however. This routine will avoid using
1229 ** the first two bytes past the cell pointer area since presumably this 1476 ** the first two bytes past the cell pointer area since presumably this
1230 ** allocation is being made in order to insert a new cell, so we will 1477 ** allocation is being made in order to insert a new cell, so we will
1231 ** also end up needing a new cell pointer. 1478 ** also end up needing a new cell pointer.
1232 */ 1479 */
1233 static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ 1480 static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
1234 const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */ 1481 const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */
1235 u8 * const data = pPage->aData; /* Local cache of pPage->aData */ 1482 u8 * const data = pPage->aData; /* Local cache of pPage->aData */
1236 int top; /* First byte of cell content area */ 1483 int top; /* First byte of cell content area */
1484 int rc = SQLITE_OK; /* Integer return code */
1237 int gap; /* First byte of gap between cell pointers and cell content */ 1485 int gap; /* First byte of gap between cell pointers and cell content */
1238 int rc; /* Integer return code */
1239 int usableSize; /* Usable size of the page */
1240 1486
1241 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 1487 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1242 assert( pPage->pBt ); 1488 assert( pPage->pBt );
1243 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1489 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1244 assert( nByte>=0 ); /* Minimum cell size is 4 */ 1490 assert( nByte>=0 ); /* Minimum cell size is 4 */
1245 assert( pPage->nFree>=nByte ); 1491 assert( pPage->nFree>=nByte );
1246 assert( pPage->nOverflow==0 ); 1492 assert( pPage->nOverflow==0 );
1247 usableSize = pPage->pBt->usableSize; 1493 assert( nByte < (int)(pPage->pBt->usableSize-8) );
1248 assert( nByte < usableSize-8 );
1249 1494
1250 assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); 1495 assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf );
1251 gap = pPage->cellOffset + 2*pPage->nCell; 1496 gap = pPage->cellOffset + 2*pPage->nCell;
1252 assert( gap<=65536 ); 1497 assert( gap<=65536 );
1498 /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size
1499 ** and the reserved space is zero (the usual value for reserved space)
1500 ** then the cell content offset of an empty page wants to be 65536.
1501 ** However, that integer is too large to be stored in a 2-byte unsigned
1502 ** integer, so a value of 0 is used in its place. */
1253 top = get2byte(&data[hdr+5]); 1503 top = get2byte(&data[hdr+5]);
1504 assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */
1254 if( gap>top ){ 1505 if( gap>top ){
1255 if( top==0 ){ 1506 if( top==0 && pPage->pBt->usableSize==65536 ){
1256 top = 65536; 1507 top = 65536;
1257 }else{ 1508 }else{
1258 return SQLITE_CORRUPT_BKPT; 1509 return SQLITE_CORRUPT_BKPT;
1259 } 1510 }
1260 } 1511 }
1261 1512
1262 /* If there is enough space between gap and top for one more cell pointer 1513 /* If there is enough space between gap and top for one more cell pointer
1263 ** array entry offset, and if the freelist is not empty, then search the 1514 ** array entry offset, and if the freelist is not empty, then search the
1264 ** freelist looking for a free slot big enough to satisfy the request. 1515 ** freelist looking for a free slot big enough to satisfy the request.
1265 */ 1516 */
1266 testcase( gap+2==top ); 1517 testcase( gap+2==top );
1267 testcase( gap+1==top ); 1518 testcase( gap+1==top );
1268 testcase( gap==top ); 1519 testcase( gap==top );
1269 if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){ 1520 if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){
1270 int pc, addr; 1521 u8 *pSpace = pageFindSlot(pPage, nByte, &rc);
1271 for(addr=hdr+1; (pc = get2byte(&data[addr]))>0; addr=pc){ 1522 if( pSpace ){
1272 int size; /* Size of the free slot */ 1523 assert( pSpace>=data && (pSpace - data)<65536 );
1273 if( pc>usableSize-4 || pc<addr+4 ){ 1524 *pIdx = (int)(pSpace - data);
1274 return SQLITE_CORRUPT_BKPT; 1525 return SQLITE_OK;
1275 } 1526 }else if( rc ){
1276 size = get2byte(&data[pc+2]); 1527 return rc;
1277 if( size>=nByte ){
1278 int x = size - nByte;
1279 testcase( x==4 );
1280 testcase( x==3 );
1281 if( x<4 ){
1282 if( data[hdr+7]>=60 ) goto defragment_page;
1283 /* Remove the slot from the free-list. Update the number of
1284 ** fragmented bytes within the page. */
1285 memcpy(&data[addr], &data[pc], 2);
1286 data[hdr+7] += (u8)x;
1287 }else if( size+pc > usableSize ){
1288 return SQLITE_CORRUPT_BKPT;
1289 }else{
1290 /* The slot remains on the free-list. Reduce its size to account
1291 ** for the portion used by the new allocation. */
1292 put2byte(&data[pc+2], x);
1293 }
1294 *pIdx = pc + x;
1295 return SQLITE_OK;
1296 }
1297 } 1528 }
1298 } 1529 }
1299 1530
1300 /* The request could not be fulfilled using a freelist slot. Check 1531 /* The request could not be fulfilled using a freelist slot. Check
1301 ** to see if defragmentation is necessary. 1532 ** to see if defragmentation is necessary.
1302 */ 1533 */
1303 testcase( gap+2+nByte==top ); 1534 testcase( gap+2+nByte==top );
1304 if( gap+2+nByte>top ){ 1535 if( gap+2+nByte>top ){
1305 defragment_page: 1536 assert( pPage->nCell>0 || CORRUPT_DB );
1306 testcase( pPage->nCell==0 );
1307 rc = defragmentPage(pPage); 1537 rc = defragmentPage(pPage);
1308 if( rc ) return rc; 1538 if( rc ) return rc;
1309 top = get2byteNotZero(&data[hdr+5]); 1539 top = get2byteNotZero(&data[hdr+5]);
1310 assert( gap+nByte<=top ); 1540 assert( gap+nByte<=top );
1311 } 1541 }
1312 1542
1313 1543
1314 /* Allocate memory from the gap in between the cell pointer array 1544 /* Allocate memory from the gap in between the cell pointer array
1315 ** and the cell content area. The btreeInitPage() call has already 1545 ** and the cell content area. The btreeInitPage() call has already
1316 ** validated the freelist. Given that the freelist is valid, there 1546 ** validated the freelist. Given that the freelist is valid, there
(...skipping 25 matching lines...) Expand all
1342 u16 iFreeBlk; /* Address of the next freeblock */ 1572 u16 iFreeBlk; /* Address of the next freeblock */
1343 u8 hdr; /* Page header size. 0 or 100 */ 1573 u8 hdr; /* Page header size. 0 or 100 */
1344 u8 nFrag = 0; /* Reduction in fragmentation */ 1574 u8 nFrag = 0; /* Reduction in fragmentation */
1345 u16 iOrigSize = iSize; /* Original value of iSize */ 1575 u16 iOrigSize = iSize; /* Original value of iSize */
1346 u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */ 1576 u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */
1347 u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */ 1577 u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */
1348 unsigned char *data = pPage->aData; /* Page content */ 1578 unsigned char *data = pPage->aData; /* Page content */
1349 1579
1350 assert( pPage->pBt!=0 ); 1580 assert( pPage->pBt!=0 );
1351 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 1581 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
1352 assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); 1582 assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize );
1353 assert( iEnd <= pPage->pBt->usableSize ); 1583 assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize );
1354 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1584 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1355 assert( iSize>=4 ); /* Minimum cell size is 4 */ 1585 assert( iSize>=4 ); /* Minimum cell size is 4 */
1356 assert( iStart<=iLast ); 1586 assert( iStart<=iLast );
1357 1587
1358 /* Overwrite deleted information with zeros when the secure_delete 1588 /* Overwrite deleted information with zeros when the secure_delete
1359 ** option is enabled */ 1589 ** option is enabled */
1360 if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){ 1590 if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){
1361 memset(&data[iStart], 0, iSize); 1591 memset(&data[iStart], 0, iSize);
1362 } 1592 }
1363 1593
1364 /* The list of freeblocks must be in ascending order. Find the 1594 /* The list of freeblocks must be in ascending order. Find the
1365 ** spot on the list where iStart should be inserted. 1595 ** spot on the list where iStart should be inserted.
1366 */ 1596 */
1367 hdr = pPage->hdrOffset; 1597 hdr = pPage->hdrOffset;
1368 iPtr = hdr + 1; 1598 iPtr = hdr + 1;
1369 if( data[iPtr+1]==0 && data[iPtr]==0 ){ 1599 if( data[iPtr+1]==0 && data[iPtr]==0 ){
1370 iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ 1600 iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */
1371 }else{ 1601 }else{
1372 while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){ 1602 while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){
1373 if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT; 1603 if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT;
1374 iPtr = iFreeBlk; 1604 iPtr = iFreeBlk;
1375 } 1605 }
1376 if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT; 1606 if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT;
1377 assert( iFreeBlk>iPtr || iFreeBlk==0 ); 1607 assert( iFreeBlk>iPtr || iFreeBlk==0 );
1378 1608
1379 /* At this point: 1609 /* At this point:
1380 ** iFreeBlk: First freeblock after iStart, or zero if none 1610 ** iFreeBlk: First freeblock after iStart, or zero if none
1381 ** iPtr: The address of a pointer iFreeBlk 1611 ** iPtr: The address of a pointer to iFreeBlk
1382 ** 1612 **
1383 ** Check to see if iFreeBlk should be coalesced onto the end of iStart. 1613 ** Check to see if iFreeBlk should be coalesced onto the end of iStart.
1384 */ 1614 */
1385 if( iFreeBlk && iEnd+3>=iFreeBlk ){ 1615 if( iFreeBlk && iEnd+3>=iFreeBlk ){
1386 nFrag = iFreeBlk - iEnd; 1616 nFrag = iFreeBlk - iEnd;
1387 if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; 1617 if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT;
1388 iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); 1618 iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]);
1619 if( iEnd > pPage->pBt->usableSize ) return SQLITE_CORRUPT_BKPT;
1389 iSize = iEnd - iStart; 1620 iSize = iEnd - iStart;
1390 iFreeBlk = get2byte(&data[iFreeBlk]); 1621 iFreeBlk = get2byte(&data[iFreeBlk]);
1391 } 1622 }
1392 1623
1393 /* If iPtr is another freeblock (that is, if iPtr is not the freelist 1624 /* If iPtr is another freeblock (that is, if iPtr is not the freelist
1394 ** pointer in the page header) then check to see if iStart should be 1625 ** pointer in the page header) then check to see if iStart should be
1395 ** coalesced onto the end of iPtr. 1626 ** coalesced onto the end of iPtr.
1396 */ 1627 */
1397 if( iPtr>hdr+1 ){ 1628 if( iPtr>hdr+1 ){
1398 int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); 1629 int iPtrEnd = iPtr + get2byte(&data[iPtr+2]);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1436 ** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF 1667 ** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF
1437 */ 1668 */
1438 static int decodeFlags(MemPage *pPage, int flagByte){ 1669 static int decodeFlags(MemPage *pPage, int flagByte){
1439 BtShared *pBt; /* A copy of pPage->pBt */ 1670 BtShared *pBt; /* A copy of pPage->pBt */
1440 1671
1441 assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); 1672 assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
1442 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1673 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1443 pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); 1674 pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 );
1444 flagByte &= ~PTF_LEAF; 1675 flagByte &= ~PTF_LEAF;
1445 pPage->childPtrSize = 4-4*pPage->leaf; 1676 pPage->childPtrSize = 4-4*pPage->leaf;
1677 pPage->xCellSize = cellSizePtr;
1446 pBt = pPage->pBt; 1678 pBt = pPage->pBt;
1447 if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ 1679 if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){
1680 /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior
1681 ** table b-tree page. */
1682 assert( (PTF_LEAFDATA|PTF_INTKEY)==5 );
1683 /* EVIDENCE-OF: R-20501-61796 A value of 13 means the page is a leaf
1684 ** table b-tree page. */
1685 assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 );
1448 pPage->intKey = 1; 1686 pPage->intKey = 1;
1449 pPage->intKeyLeaf = pPage->leaf; 1687 if( pPage->leaf ){
1450 pPage->noPayload = !pPage->leaf; 1688 pPage->intKeyLeaf = 1;
1689 pPage->noPayload = 0;
1690 pPage->xParseCell = btreeParseCellPtr;
1691 }else{
1692 pPage->intKeyLeaf = 0;
1693 pPage->noPayload = 1;
1694 pPage->xCellSize = cellSizePtrNoPayload;
1695 pPage->xParseCell = btreeParseCellPtrNoPayload;
1696 }
1451 pPage->maxLocal = pBt->maxLeaf; 1697 pPage->maxLocal = pBt->maxLeaf;
1452 pPage->minLocal = pBt->minLeaf; 1698 pPage->minLocal = pBt->minLeaf;
1453 }else if( flagByte==PTF_ZERODATA ){ 1699 }else if( flagByte==PTF_ZERODATA ){
1700 /* EVIDENCE-OF: R-27225-53936 A value of 2 means the page is an interior
1701 ** index b-tree page. */
1702 assert( (PTF_ZERODATA)==2 );
1703 /* EVIDENCE-OF: R-16571-11615 A value of 10 means the page is a leaf
1704 ** index b-tree page. */
1705 assert( (PTF_ZERODATA|PTF_LEAF)==10 );
1454 pPage->intKey = 0; 1706 pPage->intKey = 0;
1455 pPage->intKeyLeaf = 0; 1707 pPage->intKeyLeaf = 0;
1456 pPage->noPayload = 0; 1708 pPage->noPayload = 0;
1709 pPage->xParseCell = btreeParseCellPtrIndex;
1457 pPage->maxLocal = pBt->maxLocal; 1710 pPage->maxLocal = pBt->maxLocal;
1458 pPage->minLocal = pBt->minLocal; 1711 pPage->minLocal = pBt->minLocal;
1459 }else{ 1712 }else{
1713 /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is
1714 ** an error. */
1460 return SQLITE_CORRUPT_BKPT; 1715 return SQLITE_CORRUPT_BKPT;
1461 } 1716 }
1462 pPage->max1bytePayload = pBt->max1bytePayload; 1717 pPage->max1bytePayload = pBt->max1bytePayload;
1463 return SQLITE_OK; 1718 return SQLITE_OK;
1464 } 1719 }
1465 1720
1466 /* 1721 /*
1467 ** Initialize the auxiliary information for a disk block. 1722 ** Initialize the auxiliary information for a disk block.
1468 ** 1723 **
1469 ** Return SQLITE_OK on success. If we see that the page does 1724 ** Return SQLITE_OK on success. If we see that the page does
1470 ** not contain a well-formed database page, then return 1725 ** not contain a well-formed database page, then return
1471 ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not 1726 ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
1472 ** guarantee that the page is well-formed. It only shows that 1727 ** guarantee that the page is well-formed. It only shows that
1473 ** we failed to detect any corruption. 1728 ** we failed to detect any corruption.
1474 */ 1729 */
1475 static int btreeInitPage(MemPage *pPage){ 1730 static int btreeInitPage(MemPage *pPage){
1476 1731
1477 assert( pPage->pBt!=0 ); 1732 assert( pPage->pBt!=0 );
1733 assert( pPage->pBt->db!=0 );
1478 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 1734 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
1479 assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); 1735 assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
1480 assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); 1736 assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
1481 assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); 1737 assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
1482 1738
1483 if( !pPage->isInit ){ 1739 if( !pPage->isInit ){
1484 u16 pc; /* Address of a freeblock within pPage->aData[] */ 1740 u16 pc; /* Address of a freeblock within pPage->aData[] */
1485 u8 hdr; /* Offset to beginning of page header */ 1741 u8 hdr; /* Offset to beginning of page header */
1486 u8 *data; /* Equal to pPage->aData */ 1742 u8 *data; /* Equal to pPage->aData */
1487 BtShared *pBt; /* The main btree structure */ 1743 BtShared *pBt; /* The main btree structure */
1488 int usableSize; /* Amount of usable space on each page */ 1744 int usableSize; /* Amount of usable space on each page */
1489 u16 cellOffset; /* Offset from start of page to first cell pointer */ 1745 u16 cellOffset; /* Offset from start of page to first cell pointer */
1490 int nFree; /* Number of unused bytes on the page */ 1746 int nFree; /* Number of unused bytes on the page */
1491 int top; /* First byte of the cell content area */ 1747 int top; /* First byte of the cell content area */
1492 int iCellFirst; /* First allowable cell or freeblock offset */ 1748 int iCellFirst; /* First allowable cell or freeblock offset */
1493 int iCellLast; /* Last possible cell or freeblock offset */ 1749 int iCellLast; /* Last possible cell or freeblock offset */
1494 1750
1495 pBt = pPage->pBt; 1751 pBt = pPage->pBt;
1496 1752
1497 hdr = pPage->hdrOffset; 1753 hdr = pPage->hdrOffset;
1498 data = pPage->aData; 1754 data = pPage->aData;
1755 /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating
1756 ** the b-tree page type. */
1499 if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT; 1757 if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
1500 assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); 1758 assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
1501 pPage->maskPage = (u16)(pBt->pageSize - 1); 1759 pPage->maskPage = (u16)(pBt->pageSize - 1);
1502 pPage->nOverflow = 0; 1760 pPage->nOverflow = 0;
1503 usableSize = pBt->usableSize; 1761 usableSize = pBt->usableSize;
1504 pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf; 1762 pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize;
1505 pPage->aDataEnd = &data[usableSize]; 1763 pPage->aDataEnd = &data[usableSize];
1506 pPage->aCellIdx = &data[cellOffset]; 1764 pPage->aCellIdx = &data[cellOffset];
1765 pPage->aDataOfst = &data[pPage->childPtrSize];
1766 /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates
1767 ** the start of the cell content area. A zero value for this integer is
1768 ** interpreted as 65536. */
1507 top = get2byteNotZero(&data[hdr+5]); 1769 top = get2byteNotZero(&data[hdr+5]);
1770 /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
1771 ** number of cells on the page. */
1508 pPage->nCell = get2byte(&data[hdr+3]); 1772 pPage->nCell = get2byte(&data[hdr+3]);
1509 if( pPage->nCell>MX_CELL(pBt) ){ 1773 if( pPage->nCell>MX_CELL(pBt) ){
1510 /* To many cells for a single page. The page must be corrupt */ 1774 /* To many cells for a single page. The page must be corrupt */
1511 return SQLITE_CORRUPT_BKPT; 1775 return SQLITE_CORRUPT_BKPT;
1512 } 1776 }
1513 testcase( pPage->nCell==MX_CELL(pBt) ); 1777 testcase( pPage->nCell==MX_CELL(pBt) );
1778 /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only
1779 ** possible for a root page of a table that contains no rows) then the
1780 ** offset to the cell content area will equal the page size minus the
1781 ** bytes of reserved space. */
1782 assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB );
1514 1783
1515 /* A malformed database page might cause us to read past the end 1784 /* A malformed database page might cause us to read past the end
1516 ** of page when parsing a cell. 1785 ** of page when parsing a cell.
1517 ** 1786 **
1518 ** The following block of code checks early to see if a cell extends 1787 ** The following block of code checks early to see if a cell extends
1519 ** past the end of a page boundary and causes SQLITE_CORRUPT to be 1788 ** past the end of a page boundary and causes SQLITE_CORRUPT to be
1520 ** returned if it does. 1789 ** returned if it does.
1521 */ 1790 */
1522 iCellFirst = cellOffset + 2*pPage->nCell; 1791 iCellFirst = cellOffset + 2*pPage->nCell;
1523 iCellLast = usableSize - 4; 1792 iCellLast = usableSize - 4;
1524 #if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) 1793 if( pBt->db->flags & SQLITE_CellSizeCk ){
1525 {
1526 int i; /* Index into the cell pointer array */ 1794 int i; /* Index into the cell pointer array */
1527 int sz; /* Size of a cell */ 1795 int sz; /* Size of a cell */
1528 1796
1529 if( !pPage->leaf ) iCellLast--; 1797 if( !pPage->leaf ) iCellLast--;
1530 for(i=0; i<pPage->nCell; i++){ 1798 for(i=0; i<pPage->nCell; i++){
1531 pc = get2byte(&data[cellOffset+i*2]); 1799 pc = get2byteAligned(&data[cellOffset+i*2]);
1532 testcase( pc==iCellFirst ); 1800 testcase( pc==iCellFirst );
1533 testcase( pc==iCellLast ); 1801 testcase( pc==iCellLast );
1534 if( pc<iCellFirst || pc>iCellLast ){ 1802 if( pc<iCellFirst || pc>iCellLast ){
1535 return SQLITE_CORRUPT_BKPT; 1803 return SQLITE_CORRUPT_BKPT;
1536 } 1804 }
1537 sz = cellSizePtr(pPage, &data[pc]); 1805 sz = pPage->xCellSize(pPage, &data[pc]);
1538 testcase( pc+sz==usableSize ); 1806 testcase( pc+sz==usableSize );
1539 if( pc+sz>usableSize ){ 1807 if( pc+sz>usableSize ){
1540 return SQLITE_CORRUPT_BKPT; 1808 return SQLITE_CORRUPT_BKPT;
1541 } 1809 }
1542 } 1810 }
1543 if( !pPage->leaf ) iCellLast++; 1811 if( !pPage->leaf ) iCellLast++;
1544 } 1812 }
1545 #endif
1546 1813
1547 /* Compute the total free space on the page */ 1814 /* Compute the total free space on the page
1815 ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the
1816 ** start of the first freeblock on the page, or is zero if there are no
1817 ** freeblocks. */
1548 pc = get2byte(&data[hdr+1]); 1818 pc = get2byte(&data[hdr+1]);
1549 nFree = data[hdr+7] + top; 1819 nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */
1550 while( pc>0 ){ 1820 while( pc>0 ){
1551 u16 next, size; 1821 u16 next, size;
1552 if( pc<iCellFirst || pc>iCellLast ){ 1822 if( pc<iCellFirst || pc>iCellLast ){
1553 /* Start of free block is off the page */ 1823 /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will
1824 ** always be at least one cell before the first freeblock.
1825 **
1826 ** Or, the freeblock is off the end of the page
1827 */
1554 return SQLITE_CORRUPT_BKPT; 1828 return SQLITE_CORRUPT_BKPT;
1555 } 1829 }
1556 next = get2byte(&data[pc]); 1830 next = get2byte(&data[pc]);
1557 size = get2byte(&data[pc+2]); 1831 size = get2byte(&data[pc+2]);
1558 if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){ 1832 if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){
1559 /* Free blocks must be in ascending order. And the last byte of 1833 /* Free blocks must be in ascending order. And the last byte of
1560 ** the free-block must lie on the database page. */ 1834 ** the free-block must lie on the database page. */
1561 return SQLITE_CORRUPT_BKPT; 1835 return SQLITE_CORRUPT_BKPT;
1562 } 1836 }
1563 nFree = nFree + size; 1837 nFree = nFree + size;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1601 data[hdr] = (char)flags; 1875 data[hdr] = (char)flags;
1602 first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); 1876 first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8);
1603 memset(&data[hdr+1], 0, 4); 1877 memset(&data[hdr+1], 0, 4);
1604 data[hdr+7] = 0; 1878 data[hdr+7] = 0;
1605 put2byte(&data[hdr+5], pBt->usableSize); 1879 put2byte(&data[hdr+5], pBt->usableSize);
1606 pPage->nFree = (u16)(pBt->usableSize - first); 1880 pPage->nFree = (u16)(pBt->usableSize - first);
1607 decodeFlags(pPage, flags); 1881 decodeFlags(pPage, flags);
1608 pPage->cellOffset = first; 1882 pPage->cellOffset = first;
1609 pPage->aDataEnd = &data[pBt->usableSize]; 1883 pPage->aDataEnd = &data[pBt->usableSize];
1610 pPage->aCellIdx = &data[first]; 1884 pPage->aCellIdx = &data[first];
1885 pPage->aDataOfst = &data[pPage->childPtrSize];
1611 pPage->nOverflow = 0; 1886 pPage->nOverflow = 0;
1612 assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); 1887 assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
1613 pPage->maskPage = (u16)(pBt->pageSize - 1); 1888 pPage->maskPage = (u16)(pBt->pageSize - 1);
1614 pPage->nCell = 0; 1889 pPage->nCell = 0;
1615 pPage->isInit = 1; 1890 pPage->isInit = 1;
1616 } 1891 }
1617 1892
1618 1893
1619 /* 1894 /*
1620 ** Convert a DbPage obtained from the pager into a MemPage used by 1895 ** Convert a DbPage obtained from the pager into a MemPage used by
1621 ** the btree layer. 1896 ** the btree layer.
1622 */ 1897 */
1623 static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ 1898 static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){
1624 MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); 1899 MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
1625 pPage->aData = sqlite3PagerGetData(pDbPage); 1900 if( pgno!=pPage->pgno ){
1626 pPage->pDbPage = pDbPage; 1901 pPage->aData = sqlite3PagerGetData(pDbPage);
1627 pPage->pBt = pBt; 1902 pPage->pDbPage = pDbPage;
1628 pPage->pgno = pgno; 1903 pPage->pBt = pBt;
1629 pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; 1904 pPage->pgno = pgno;
1905 pPage->hdrOffset = pgno==1 ? 100 : 0;
1906 }
1907 assert( pPage->aData==sqlite3PagerGetData(pDbPage) );
1630 return pPage; 1908 return pPage;
1631 } 1909 }
1632 1910
1633 /* 1911 /*
1634 ** Get a page from the pager. Initialize the MemPage.pBt and 1912 ** Get a page from the pager. Initialize the MemPage.pBt and
1635 ** MemPage.aData elements if needed. 1913 ** MemPage.aData elements if needed. See also: btreeGetUnusedPage().
1636 ** 1914 **
1637 ** If the noContent flag is set, it means that we do not care about 1915 ** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care
1638 ** the content of the page at this time. So do not go to the disk 1916 ** about the content of the page at this time. So do not go to the disk
1639 ** to fetch the content. Just fill in the content with zeros for now. 1917 ** to fetch the content. Just fill in the content with zeros for now.
1640 ** If in the future we call sqlite3PagerWrite() on this page, that 1918 ** If in the future we call sqlite3PagerWrite() on this page, that
1641 ** means we have started to be concerned about content and the disk 1919 ** means we have started to be concerned about content and the disk
1642 ** read should occur at that point. 1920 ** read should occur at that point.
1643 */ 1921 */
1644 static int btreeGetPage( 1922 static int btreeGetPage(
1645 BtShared *pBt, /* The btree */ 1923 BtShared *pBt, /* The btree */
1646 Pgno pgno, /* Number of the page to fetch */ 1924 Pgno pgno, /* Number of the page to fetch */
1647 MemPage **ppPage, /* Return the page in this parameter */ 1925 MemPage **ppPage, /* Return the page in this parameter */
1648 int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ 1926 int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */
1649 ){ 1927 ){
1650 int rc; 1928 int rc;
1651 DbPage *pDbPage; 1929 DbPage *pDbPage;
1652 1930
1653 assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); 1931 assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY );
1654 assert( sqlite3_mutex_held(pBt->mutex) ); 1932 assert( sqlite3_mutex_held(pBt->mutex) );
1655 rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); 1933 rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, flags);
1656 if( rc ) return rc; 1934 if( rc ) return rc;
1657 *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); 1935 *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
1658 return SQLITE_OK; 1936 return SQLITE_OK;
1659 } 1937 }
1660 1938
1661 /* 1939 /*
1662 ** Retrieve a page from the pager cache. If the requested page is not 1940 ** Retrieve a page from the pager cache. If the requested page is not
1663 ** already in the pager cache return NULL. Initialize the MemPage.pBt and 1941 ** already in the pager cache return NULL. Initialize the MemPage.pBt and
1664 ** MemPage.aData elements if needed. 1942 ** MemPage.aData elements if needed.
1665 */ 1943 */
(...skipping 14 matching lines...) Expand all
1680 static Pgno btreePagecount(BtShared *pBt){ 1958 static Pgno btreePagecount(BtShared *pBt){
1681 return pBt->nPage; 1959 return pBt->nPage;
1682 } 1960 }
1683 u32 sqlite3BtreeLastPage(Btree *p){ 1961 u32 sqlite3BtreeLastPage(Btree *p){
1684 assert( sqlite3BtreeHoldsMutex(p) ); 1962 assert( sqlite3BtreeHoldsMutex(p) );
1685 assert( ((p->pBt->nPage)&0x8000000)==0 ); 1963 assert( ((p->pBt->nPage)&0x8000000)==0 );
1686 return btreePagecount(p->pBt); 1964 return btreePagecount(p->pBt);
1687 } 1965 }
1688 1966
1689 /* 1967 /*
1690 ** Get a page from the pager and initialize it. This routine is just a 1968 ** Get a page from the pager and initialize it.
1691 ** convenience wrapper around separate calls to btreeGetPage() and
1692 ** btreeInitPage().
1693 ** 1969 **
1694 ** If an error occurs, then the value *ppPage is set to is undefined. It 1970 ** If pCur!=0 then the page is being fetched as part of a moveToChild()
1971 ** call. Do additional sanity checking on the page in this case.
1972 ** And if the fetch fails, this routine must decrement pCur->iPage.
1973 **
1974 ** The page is fetched as read-write unless pCur is not NULL and is
1975 ** a read-only cursor.
1976 **
1977 ** If an error occurs, then *ppPage is undefined. It
1695 ** may remain unchanged, or it may be set to an invalid value. 1978 ** may remain unchanged, or it may be set to an invalid value.
1696 */ 1979 */
1697 static int getAndInitPage( 1980 static int getAndInitPage(
1698 BtShared *pBt, /* The database file */ 1981 BtShared *pBt, /* The database file */
1699 Pgno pgno, /* Number of the page to get */ 1982 Pgno pgno, /* Number of the page to get */
1700 MemPage **ppPage, /* Write the page pointer here */ 1983 MemPage **ppPage, /* Write the page pointer here */
1701 int bReadonly /* PAGER_GET_READONLY or 0 */ 1984 BtCursor *pCur, /* Cursor to receive the page, or NULL */
1985 int bReadOnly /* True for a read-only page */
1702 ){ 1986 ){
1703 int rc; 1987 int rc;
1988 DbPage *pDbPage;
1704 assert( sqlite3_mutex_held(pBt->mutex) ); 1989 assert( sqlite3_mutex_held(pBt->mutex) );
1705 assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 ); 1990 assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] );
1991 assert( pCur==0 || bReadOnly==pCur->curPagerFlags );
1992 assert( pCur==0 || pCur->iPage>0 );
1706 1993
1707 if( pgno>btreePagecount(pBt) ){ 1994 if( pgno>btreePagecount(pBt) ){
1708 rc = SQLITE_CORRUPT_BKPT; 1995 rc = SQLITE_CORRUPT_BKPT;
1709 }else{ 1996 goto getAndInitPage_error;
1710 rc = btreeGetPage(pBt, pgno, ppPage, bReadonly); 1997 }
1711 if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ 1998 rc = sqlite3PagerGet(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
1712 rc = btreeInitPage(*ppPage); 1999 if( rc ){
1713 if( rc!=SQLITE_OK ){ 2000 goto getAndInitPage_error;
1714 releasePage(*ppPage); 2001 }
1715 } 2002 *ppPage = (MemPage*)sqlite3PagerGetExtra(pDbPage);
2003 if( (*ppPage)->isInit==0 ){
2004 btreePageFromDbPage(pDbPage, pgno, pBt);
2005 rc = btreeInitPage(*ppPage);
2006 if( rc!=SQLITE_OK ){
2007 releasePage(*ppPage);
2008 goto getAndInitPage_error;
1716 } 2009 }
1717 } 2010 }
2011 assert( (*ppPage)->pgno==pgno );
2012 assert( (*ppPage)->aData==sqlite3PagerGetData(pDbPage) );
1718 2013
2014 /* If obtaining a child page for a cursor, we must verify that the page is
2015 ** compatible with the root page. */
2016 if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){
2017 rc = SQLITE_CORRUPT_BKPT;
2018 releasePage(*ppPage);
2019 goto getAndInitPage_error;
2020 }
2021 return SQLITE_OK;
2022
2023 getAndInitPage_error:
2024 if( pCur ) pCur->iPage--;
1719 testcase( pgno==0 ); 2025 testcase( pgno==0 );
1720 assert( pgno!=0 || rc==SQLITE_CORRUPT ); 2026 assert( pgno!=0 || rc==SQLITE_CORRUPT );
1721 return rc; 2027 return rc;
1722 } 2028 }
1723 2029
1724 /* 2030 /*
1725 ** Release a MemPage. This should be called once for each prior 2031 ** Release a MemPage. This should be called once for each prior
1726 ** call to btreeGetPage. 2032 ** call to btreeGetPage.
1727 */ 2033 */
2034 static void releasePageNotNull(MemPage *pPage){
2035 assert( pPage->aData );
2036 assert( pPage->pBt );
2037 assert( pPage->pDbPage!=0 );
2038 assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
2039 assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
2040 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
2041 sqlite3PagerUnrefNotNull(pPage->pDbPage);
2042 }
1728 static void releasePage(MemPage *pPage){ 2043 static void releasePage(MemPage *pPage){
1729 if( pPage ){ 2044 if( pPage ) releasePageNotNull(pPage);
1730 assert( pPage->aData ); 2045 }
1731 assert( pPage->pBt ); 2046
1732 assert( pPage->pDbPage!=0 ); 2047 /*
1733 assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); 2048 ** Get an unused page.
1734 assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); 2049 **
1735 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 2050 ** This works just like btreeGetPage() with the addition:
1736 sqlite3PagerUnrefNotNull(pPage->pDbPage); 2051 **
2052 ** * If the page is already in use for some other purpose, immediately
2053 ** release it and return an SQLITE_CURRUPT error.
2054 ** * Make sure the isInit flag is clear
2055 */
2056 static int btreeGetUnusedPage(
2057 BtShared *pBt, /* The btree */
2058 Pgno pgno, /* Number of the page to fetch */
2059 MemPage **ppPage, /* Return the page in this parameter */
2060 int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */
2061 ){
2062 int rc = btreeGetPage(pBt, pgno, ppPage, flags);
2063 if( rc==SQLITE_OK ){
2064 if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){
2065 releasePage(*ppPage);
2066 *ppPage = 0;
2067 return SQLITE_CORRUPT_BKPT;
2068 }
2069 (*ppPage)->isInit = 0;
2070 }else{
2071 *ppPage = 0;
1737 } 2072 }
2073 return rc;
1738 } 2074 }
1739 2075
2076
1740 /* 2077 /*
1741 ** During a rollback, when the pager reloads information into the cache 2078 ** During a rollback, when the pager reloads information into the cache
1742 ** so that the cache is restored to its original state at the start of 2079 ** so that the cache is restored to its original state at the start of
1743 ** the transaction, for each page restored this routine is called. 2080 ** the transaction, for each page restored this routine is called.
1744 ** 2081 **
1745 ** This routine needs to reset the extra data section at the end of the 2082 ** This routine needs to reset the extra data section at the end of the
1746 ** page to agree with the restored data. 2083 ** page to agree with the restored data.
1747 */ 2084 */
1748 static void pageReinit(DbPage *pData){ 2085 static void pageReinit(DbPage *pData){
1749 MemPage *pPage; 2086 MemPage *pPage;
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1852 p->lock.iTable = 1; 2189 p->lock.iTable = 1;
1853 #endif 2190 #endif
1854 2191
1855 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) 2192 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
1856 /* 2193 /*
1857 ** If this Btree is a candidate for shared cache, try to find an 2194 ** If this Btree is a candidate for shared cache, try to find an
1858 ** existing BtShared object that we can share with 2195 ** existing BtShared object that we can share with
1859 */ 2196 */
1860 if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){ 2197 if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
1861 if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ 2198 if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
2199 int nFilename = sqlite3Strlen30(zFilename)+1;
1862 int nFullPathname = pVfs->mxPathname+1; 2200 int nFullPathname = pVfs->mxPathname+1;
1863 char *zFullPathname = sqlite3Malloc(nFullPathname); 2201 char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename));
1864 MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) 2202 MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
2203
1865 p->sharable = 1; 2204 p->sharable = 1;
1866 if( !zFullPathname ){ 2205 if( !zFullPathname ){
1867 sqlite3_free(p); 2206 sqlite3_free(p);
1868 return SQLITE_NOMEM; 2207 return SQLITE_NOMEM;
1869 } 2208 }
1870 if( isMemdb ){ 2209 if( isMemdb ){
1871 memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1); 2210 memcpy(zFullPathname, zFilename, nFilename);
1872 }else{ 2211 }else{
1873 rc = sqlite3OsFullPathname(pVfs, zFilename, 2212 rc = sqlite3OsFullPathname(pVfs, zFilename,
1874 nFullPathname, zFullPathname); 2213 nFullPathname, zFullPathname);
1875 if( rc ){ 2214 if( rc ){
1876 sqlite3_free(zFullPathname); 2215 sqlite3_free(zFullPathname);
1877 sqlite3_free(p); 2216 sqlite3_free(p);
1878 return rc; 2217 return rc;
1879 } 2218 }
1880 } 2219 }
1881 #if SQLITE_THREADSAFE 2220 #if SQLITE_THREADSAFE
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1918 } 2257 }
1919 #endif 2258 #endif
1920 } 2259 }
1921 #endif 2260 #endif
1922 if( pBt==0 ){ 2261 if( pBt==0 ){
1923 /* 2262 /*
1924 ** The following asserts make sure that structures used by the btree are 2263 ** The following asserts make sure that structures used by the btree are
1925 ** the right size. This is to guard against size changes that result 2264 ** the right size. This is to guard against size changes that result
1926 ** when compiling on a different architecture. 2265 ** when compiling on a different architecture.
1927 */ 2266 */
1928 assert( sizeof(i64)==8 || sizeof(i64)==4 ); 2267 assert( sizeof(i64)==8 );
1929 assert( sizeof(u64)==8 || sizeof(u64)==4 ); 2268 assert( sizeof(u64)==8 );
1930 assert( sizeof(u32)==4 ); 2269 assert( sizeof(u32)==4 );
1931 assert( sizeof(u16)==2 ); 2270 assert( sizeof(u16)==2 );
1932 assert( sizeof(Pgno)==4 ); 2271 assert( sizeof(Pgno)==4 );
1933 2272
1934 pBt = sqlite3MallocZero( sizeof(*pBt) ); 2273 pBt = sqlite3MallocZero( sizeof(*pBt) );
1935 if( pBt==0 ){ 2274 if( pBt==0 ){
1936 rc = SQLITE_NOMEM; 2275 rc = SQLITE_NOMEM;
1937 goto btree_open_out; 2276 goto btree_open_out;
1938 } 2277 }
1939 rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, 2278 rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
1940 EXTRA_SIZE, flags, vfsFlags, pageReinit); 2279 EXTRA_SIZE, flags, vfsFlags, pageReinit);
1941 if( rc==SQLITE_OK ){ 2280 if( rc==SQLITE_OK ){
1942 sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap); 2281 sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap);
1943 rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); 2282 rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
1944 } 2283 }
1945 if( rc!=SQLITE_OK ){ 2284 if( rc!=SQLITE_OK ){
1946 goto btree_open_out; 2285 goto btree_open_out;
1947 } 2286 }
1948 pBt->openFlags = (u8)flags; 2287 pBt->openFlags = (u8)flags;
1949 pBt->db = db; 2288 pBt->db = db;
1950 sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt); 2289 sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt);
1951 p->pBt = pBt; 2290 p->pBt = pBt;
1952 2291
1953 pBt->pCursor = 0; 2292 pBt->pCursor = 0;
1954 pBt->pPage1 = 0; 2293 pBt->pPage1 = 0;
1955 if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY; 2294 if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY;
1956 #ifdef SQLITE_SECURE_DELETE 2295 #ifdef SQLITE_SECURE_DELETE
1957 pBt->btsFlags |= BTS_SECURE_DELETE; 2296 pBt->btsFlags |= BTS_SECURE_DELETE;
1958 #endif 2297 #endif
2298 /* EVIDENCE-OF: R-51873-39618 The page size for a database file is
2299 ** determined by the 2-byte integer located at an offset of 16 bytes from
2300 ** the beginning of the database file. */
1959 pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16); 2301 pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16);
1960 if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE 2302 if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
1961 || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ 2303 || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
1962 pBt->pageSize = 0; 2304 pBt->pageSize = 0;
1963 #ifndef SQLITE_OMIT_AUTOVACUUM 2305 #ifndef SQLITE_OMIT_AUTOVACUUM
1964 /* If the magic name ":memory:" will create an in-memory database, then 2306 /* If the magic name ":memory:" will create an in-memory database, then
1965 ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if 2307 ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
1966 ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if 2308 ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
1967 ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a 2309 ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
1968 ** regular file-name. In this case the auto-vacuum applies as per normal. 2310 ** regular file-name. In this case the auto-vacuum applies as per normal.
1969 */ 2311 */
1970 if( zFilename && !isMemdb ){ 2312 if( zFilename && !isMemdb ){
1971 pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); 2313 pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
1972 pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); 2314 pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
1973 } 2315 }
1974 #endif 2316 #endif
1975 nReserve = 0; 2317 nReserve = 0;
1976 }else{ 2318 }else{
2319 /* EVIDENCE-OF: R-37497-42412 The size of the reserved region is
2320 ** determined by the one-byte unsigned integer found at an offset of 20
2321 ** into the database file header. */
1977 nReserve = zDbHeader[20]; 2322 nReserve = zDbHeader[20];
1978 pBt->btsFlags |= BTS_PAGESIZE_FIXED; 2323 pBt->btsFlags |= BTS_PAGESIZE_FIXED;
1979 #ifndef SQLITE_OMIT_AUTOVACUUM 2324 #ifndef SQLITE_OMIT_AUTOVACUUM
1980 pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); 2325 pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
1981 pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); 2326 pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
1982 #endif 2327 #endif
1983 } 2328 }
1984 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); 2329 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
1985 if( rc ) goto btree_open_out; 2330 if( rc ) goto btree_open_out;
1986 pBt->usableSize = pBt->pageSize - nReserve; 2331 pBt->usableSize = pBt->pageSize - nReserve;
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
2200 assert( p->locked==0 ); 2545 assert( p->locked==0 );
2201 if( p->pPrev ) p->pPrev->pNext = p->pNext; 2546 if( p->pPrev ) p->pPrev->pNext = p->pNext;
2202 if( p->pNext ) p->pNext->pPrev = p->pPrev; 2547 if( p->pNext ) p->pNext->pPrev = p->pPrev;
2203 #endif 2548 #endif
2204 2549
2205 sqlite3_free(p); 2550 sqlite3_free(p);
2206 return SQLITE_OK; 2551 return SQLITE_OK;
2207 } 2552 }
2208 2553
2209 /* 2554 /*
2210 ** Change the limit on the number of pages allowed in the cache. 2555 ** Change the "soft" limit on the number of pages in the cache.
2211 ** 2556 ** Unused and unmodified pages will be recycled when the number of
2212 ** The maximum number of cache pages is set to the absolute 2557 ** pages in the cache exceeds this soft limit. But the size of the
2213 ** value of mxPage. If mxPage is negative, the pager will 2558 ** cache is allowed to grow larger than this limit if it contains
2214 ** operate asynchronously - it will not stop to do fsync()s 2559 ** dirty pages or pages still in active use.
2215 ** to insure data is written to the disk surface before
2216 ** continuing. Transactions still work if synchronous is off,
2217 ** and the database cannot be corrupted if this program
2218 ** crashes. But if the operating system crashes or there is
2219 ** an abrupt power failure when synchronous is off, the database
2220 ** could be left in an inconsistent and unrecoverable state.
2221 ** Synchronous is on by default so database corruption is not
2222 ** normally a worry.
2223 */ 2560 */
2224 int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ 2561 int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
2225 BtShared *pBt = p->pBt; 2562 BtShared *pBt = p->pBt;
2226 assert( sqlite3_mutex_held(p->db->mutex) ); 2563 assert( sqlite3_mutex_held(p->db->mutex) );
2227 sqlite3BtreeEnter(p); 2564 sqlite3BtreeEnter(p);
2228 sqlite3PagerSetCachesize(pBt->pPager, mxPage); 2565 sqlite3PagerSetCachesize(pBt->pPager, mxPage);
2229 sqlite3BtreeLeave(p); 2566 sqlite3BtreeLeave(p);
2230 return SQLITE_OK; 2567 return SQLITE_OK;
2231 } 2568 }
2232 2569
2570 /*
2571 ** Change the "spill" limit on the number of pages in the cache.
2572 ** If the number of pages exceeds this limit during a write transaction,
2573 ** the pager might attempt to "spill" pages to the journal early in
2574 ** order to free up memory.
2575 **
2576 ** The value returned is the current spill size. If zero is passed
2577 ** as an argument, no changes are made to the spill size setting, so
2578 ** using mxPage of 0 is a way to query the current spill size.
2579 */
2580 int sqlite3BtreeSetSpillSize(Btree *p, int mxPage){
2581 BtShared *pBt = p->pBt;
2582 int res;
2583 assert( sqlite3_mutex_held(p->db->mutex) );
2584 sqlite3BtreeEnter(p);
2585 res = sqlite3PagerSetSpillsize(pBt->pPager, mxPage);
2586 sqlite3BtreeLeave(p);
2587 return res;
2588 }
2589
2233 #if SQLITE_MAX_MMAP_SIZE>0 2590 #if SQLITE_MAX_MMAP_SIZE>0
2234 /* 2591 /*
2235 ** Change the limit on the amount of the database file that may be 2592 ** Change the limit on the amount of the database file that may be
2236 ** memory mapped. 2593 ** memory mapped.
2237 */ 2594 */
2238 int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ 2595 int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){
2239 BtShared *pBt = p->pBt; 2596 BtShared *pBt = p->pBt;
2240 assert( sqlite3_mutex_held(p->db->mutex) ); 2597 assert( sqlite3_mutex_held(p->db->mutex) );
2241 sqlite3BtreeEnter(p); 2598 sqlite3BtreeEnter(p);
2242 sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); 2599 sqlite3PagerSetMmapLimit(pBt->pPager, szMmap);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
2300 ** bytes per page is left unchanged. 2657 ** bytes per page is left unchanged.
2301 ** 2658 **
2302 ** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size 2659 ** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size
2303 ** and autovacuum mode can no longer be changed. 2660 ** and autovacuum mode can no longer be changed.
2304 */ 2661 */
2305 int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ 2662 int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){
2306 int rc = SQLITE_OK; 2663 int rc = SQLITE_OK;
2307 BtShared *pBt = p->pBt; 2664 BtShared *pBt = p->pBt;
2308 assert( nReserve>=-1 && nReserve<=255 ); 2665 assert( nReserve>=-1 && nReserve<=255 );
2309 sqlite3BtreeEnter(p); 2666 sqlite3BtreeEnter(p);
2667 #if SQLITE_HAS_CODEC
2668 if( nReserve>pBt->optimalReserve ) pBt->optimalReserve = (u8)nReserve;
2669 #endif
2310 if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){ 2670 if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){
2311 sqlite3BtreeLeave(p); 2671 sqlite3BtreeLeave(p);
2312 return SQLITE_READONLY; 2672 return SQLITE_READONLY;
2313 } 2673 }
2314 if( nReserve<0 ){ 2674 if( nReserve<0 ){
2315 nReserve = pBt->pageSize - pBt->usableSize; 2675 nReserve = pBt->pageSize - pBt->usableSize;
2316 } 2676 }
2317 assert( nReserve>=0 && nReserve<=255 ); 2677 assert( nReserve>=0 && nReserve<=255 );
2318 if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && 2678 if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
2319 ((pageSize-1)&pageSize)==0 ){ 2679 ((pageSize-1)&pageSize)==0 ){
2320 assert( (pageSize & 7)==0 ); 2680 assert( (pageSize & 7)==0 );
2321 assert( !pBt->pPage1 && !pBt->pCursor ); 2681 assert( !pBt->pCursor );
2322 pBt->pageSize = (u32)pageSize; 2682 pBt->pageSize = (u32)pageSize;
2323 freeTempSpace(pBt); 2683 freeTempSpace(pBt);
2324 } 2684 }
2325 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); 2685 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve);
2326 pBt->usableSize = pBt->pageSize - (u16)nReserve; 2686 pBt->usableSize = pBt->pageSize - (u16)nReserve;
2327 if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED; 2687 if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED;
2328 sqlite3BtreeLeave(p); 2688 sqlite3BtreeLeave(p);
2329 return rc; 2689 return rc;
2330 } 2690 }
2331 2691
2332 /* 2692 /*
2333 ** Return the currently defined page size 2693 ** Return the currently defined page size
2334 */ 2694 */
2335 int sqlite3BtreeGetPageSize(Btree *p){ 2695 int sqlite3BtreeGetPageSize(Btree *p){
2336 return p->pBt->pageSize; 2696 return p->pBt->pageSize;
2337 } 2697 }
2338 2698
2339 #if defined(SQLITE_HAS_CODEC) || defined(SQLITE_DEBUG)
2340 /* 2699 /*
2341 ** This function is similar to sqlite3BtreeGetReserve(), except that it 2700 ** This function is similar to sqlite3BtreeGetReserve(), except that it
2342 ** may only be called if it is guaranteed that the b-tree mutex is already 2701 ** may only be called if it is guaranteed that the b-tree mutex is already
2343 ** held. 2702 ** held.
2344 ** 2703 **
2345 ** This is useful in one special case in the backup API code where it is 2704 ** This is useful in one special case in the backup API code where it is
2346 ** known that the shared b-tree mutex is held, but the mutex on the 2705 ** known that the shared b-tree mutex is held, but the mutex on the
2347 ** database handle that owns *p is not. In this case if sqlite3BtreeEnter() 2706 ** database handle that owns *p is not. In this case if sqlite3BtreeEnter()
2348 ** were to be called, it might collide with some other operation on the 2707 ** were to be called, it might collide with some other operation on the
2349 ** database handle that owns *p, causing undefined behavior. 2708 ** database handle that owns *p, causing undefined behavior.
2350 */ 2709 */
2351 int sqlite3BtreeGetReserveNoMutex(Btree *p){ 2710 int sqlite3BtreeGetReserveNoMutex(Btree *p){
2711 int n;
2352 assert( sqlite3_mutex_held(p->pBt->mutex) ); 2712 assert( sqlite3_mutex_held(p->pBt->mutex) );
2353 return p->pBt->pageSize - p->pBt->usableSize; 2713 n = p->pBt->pageSize - p->pBt->usableSize;
2714 return n;
2354 } 2715 }
2355 #endif /* SQLITE_HAS_CODEC || SQLITE_DEBUG */
2356 2716
2357 #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
2358 /* 2717 /*
2359 ** Return the number of bytes of space at the end of every page that 2718 ** Return the number of bytes of space at the end of every page that
2360 ** are intentually left unused. This is the "reserved" space that is 2719 ** are intentually left unused. This is the "reserved" space that is
2361 ** sometimes used by extensions. 2720 ** sometimes used by extensions.
2721 **
2722 ** If SQLITE_HAS_MUTEX is defined then the number returned is the
2723 ** greater of the current reserved space and the maximum requested
2724 ** reserve space.
2362 */ 2725 */
2363 int sqlite3BtreeGetReserve(Btree *p){ 2726 int sqlite3BtreeGetOptimalReserve(Btree *p){
2364 int n; 2727 int n;
2365 sqlite3BtreeEnter(p); 2728 sqlite3BtreeEnter(p);
2366 n = p->pBt->pageSize - p->pBt->usableSize; 2729 n = sqlite3BtreeGetReserveNoMutex(p);
2730 #ifdef SQLITE_HAS_CODEC
2731 if( n<p->pBt->optimalReserve ) n = p->pBt->optimalReserve;
2732 #endif
2367 sqlite3BtreeLeave(p); 2733 sqlite3BtreeLeave(p);
2368 return n; 2734 return n;
2369 } 2735 }
2370 2736
2737
2371 /* 2738 /*
2372 ** Set the maximum page count for a database if mxPage is positive. 2739 ** Set the maximum page count for a database if mxPage is positive.
2373 ** No changes are made if mxPage is 0 or negative. 2740 ** No changes are made if mxPage is 0 or negative.
2374 ** Regardless of the value of mxPage, return the maximum page count. 2741 ** Regardless of the value of mxPage, return the maximum page count.
2375 */ 2742 */
2376 int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){ 2743 int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
2377 int n; 2744 int n;
2378 sqlite3BtreeEnter(p); 2745 sqlite3BtreeEnter(p);
2379 n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); 2746 n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
2380 sqlite3BtreeLeave(p); 2747 sqlite3BtreeLeave(p);
(...skipping 10 matching lines...) Expand all
2391 if( p==0 ) return 0; 2758 if( p==0 ) return 0;
2392 sqlite3BtreeEnter(p); 2759 sqlite3BtreeEnter(p);
2393 if( newFlag>=0 ){ 2760 if( newFlag>=0 ){
2394 p->pBt->btsFlags &= ~BTS_SECURE_DELETE; 2761 p->pBt->btsFlags &= ~BTS_SECURE_DELETE;
2395 if( newFlag ) p->pBt->btsFlags |= BTS_SECURE_DELETE; 2762 if( newFlag ) p->pBt->btsFlags |= BTS_SECURE_DELETE;
2396 } 2763 }
2397 b = (p->pBt->btsFlags & BTS_SECURE_DELETE)!=0; 2764 b = (p->pBt->btsFlags & BTS_SECURE_DELETE)!=0;
2398 sqlite3BtreeLeave(p); 2765 sqlite3BtreeLeave(p);
2399 return b; 2766 return b;
2400 } 2767 }
2401 #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
2402 2768
2403 /* 2769 /*
2404 ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' 2770 ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
2405 ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it 2771 ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
2406 ** is disabled. The default value for the auto-vacuum property is 2772 ** is disabled. The default value for the auto-vacuum property is
2407 ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. 2773 ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
2408 */ 2774 */
2409 int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ 2775 int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
2410 #ifdef SQLITE_OMIT_AUTOVACUUM 2776 #ifdef SQLITE_OMIT_AUTOVACUUM
2411 return SQLITE_READONLY; 2777 return SQLITE_READONLY;
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
2476 nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData); 2842 nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData);
2477 sqlite3PagerPagecount(pBt->pPager, &nPageFile); 2843 sqlite3PagerPagecount(pBt->pPager, &nPageFile);
2478 if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){ 2844 if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){
2479 nPage = nPageFile; 2845 nPage = nPageFile;
2480 } 2846 }
2481 if( nPage>0 ){ 2847 if( nPage>0 ){
2482 u32 pageSize; 2848 u32 pageSize;
2483 u32 usableSize; 2849 u32 usableSize;
2484 u8 *page1 = pPage1->aData; 2850 u8 *page1 = pPage1->aData;
2485 rc = SQLITE_NOTADB; 2851 rc = SQLITE_NOTADB;
2852 /* EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins
2853 ** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d
2854 ** 61 74 20 33 00. */
2486 if( memcmp(page1, zMagicHeader, 16)!=0 ){ 2855 if( memcmp(page1, zMagicHeader, 16)!=0 ){
2487 goto page1_init_failed; 2856 goto page1_init_failed;
2488 } 2857 }
2489 2858
2490 #ifdef SQLITE_OMIT_WAL 2859 #ifdef SQLITE_OMIT_WAL
2491 if( page1[18]>1 ){ 2860 if( page1[18]>1 ){
2492 pBt->btsFlags |= BTS_READ_ONLY; 2861 pBt->btsFlags |= BTS_READ_ONLY;
2493 } 2862 }
2494 if( page1[19]>1 ){ 2863 if( page1[19]>1 ){
2495 goto page1_init_failed; 2864 goto page1_init_failed;
(...skipping 20 matching lines...) Expand all
2516 if( rc!=SQLITE_OK ){ 2885 if( rc!=SQLITE_OK ){
2517 goto page1_init_failed; 2886 goto page1_init_failed;
2518 }else if( isOpen==0 ){ 2887 }else if( isOpen==0 ){
2519 releasePage(pPage1); 2888 releasePage(pPage1);
2520 return SQLITE_OK; 2889 return SQLITE_OK;
2521 } 2890 }
2522 rc = SQLITE_NOTADB; 2891 rc = SQLITE_NOTADB;
2523 } 2892 }
2524 #endif 2893 #endif
2525 2894
2526 /* The maximum embedded fraction must be exactly 25%. And the minimum 2895 /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload
2527 ** embedded fraction must be 12.5% for both leaf-data and non-leaf-data. 2896 ** fractions and the leaf payload fraction values must be 64, 32, and 32.
2897 **
2528 ** The original design allowed these amounts to vary, but as of 2898 ** The original design allowed these amounts to vary, but as of
2529 ** version 3.6.0, we require them to be fixed. 2899 ** version 3.6.0, we require them to be fixed.
2530 */ 2900 */
2531 if( memcmp(&page1[21], "\100\040\040",3)!=0 ){ 2901 if( memcmp(&page1[21], "\100\040\040",3)!=0 ){
2532 goto page1_init_failed; 2902 goto page1_init_failed;
2533 } 2903 }
2904 /* EVIDENCE-OF: R-51873-39618 The page size for a database file is
2905 ** determined by the 2-byte integer located at an offset of 16 bytes from
2906 ** the beginning of the database file. */
2534 pageSize = (page1[16]<<8) | (page1[17]<<16); 2907 pageSize = (page1[16]<<8) | (page1[17]<<16);
2908 /* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two
2909 ** between 512 and 65536 inclusive. */
2535 if( ((pageSize-1)&pageSize)!=0 2910 if( ((pageSize-1)&pageSize)!=0
2536 || pageSize>SQLITE_MAX_PAGE_SIZE 2911 || pageSize>SQLITE_MAX_PAGE_SIZE
2537 || pageSize<=256 2912 || pageSize<=256
2538 ){ 2913 ){
2539 goto page1_init_failed; 2914 goto page1_init_failed;
2540 } 2915 }
2541 assert( (pageSize & 7)==0 ); 2916 assert( (pageSize & 7)==0 );
2917 /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte
2918 ** integer at offset 20 is the number of bytes of space at the end of
2919 ** each page to reserve for extensions.
2920 **
2921 ** EVIDENCE-OF: R-37497-42412 The size of the reserved region is
2922 ** determined by the one-byte unsigned integer found at an offset of 20
2923 ** into the database file header. */
2542 usableSize = pageSize - page1[20]; 2924 usableSize = pageSize - page1[20];
2543 if( (u32)pageSize!=pBt->pageSize ){ 2925 if( (u32)pageSize!=pBt->pageSize ){
2544 /* After reading the first page of the database assuming a page size 2926 /* After reading the first page of the database assuming a page size
2545 ** of BtShared.pageSize, we have discovered that the page-size is 2927 ** of BtShared.pageSize, we have discovered that the page-size is
2546 ** actually pageSize. Unlock the database, leave pBt->pPage1 at 2928 ** actually pageSize. Unlock the database, leave pBt->pPage1 at
2547 ** zero and return SQLITE_OK. The caller will call this function 2929 ** zero and return SQLITE_OK. The caller will call this function
2548 ** again with the correct page-size. 2930 ** again with the correct page-size.
2549 */ 2931 */
2550 releasePage(pPage1); 2932 releasePage(pPage1);
2551 pBt->usableSize = usableSize; 2933 pBt->usableSize = usableSize;
2552 pBt->pageSize = pageSize; 2934 pBt->pageSize = pageSize;
2553 freeTempSpace(pBt); 2935 freeTempSpace(pBt);
2554 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, 2936 rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize,
2555 pageSize-usableSize); 2937 pageSize-usableSize);
2556 return rc; 2938 return rc;
2557 } 2939 }
2558 if( (pBt->db->flags & SQLITE_RecoveryMode)==0 && nPage>nPageFile ){ 2940 if( (pBt->db->flags & SQLITE_RecoveryMode)==0 && nPage>nPageFile ){
2559 rc = SQLITE_CORRUPT_BKPT; 2941 rc = SQLITE_CORRUPT_BKPT;
2560 goto page1_init_failed; 2942 goto page1_init_failed;
2561 } 2943 }
2944 /* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to
2945 ** be less than 480. In other words, if the page size is 512, then the
2946 ** reserved space size cannot exceed 32. */
2562 if( usableSize<480 ){ 2947 if( usableSize<480 ){
2563 goto page1_init_failed; 2948 goto page1_init_failed;
2564 } 2949 }
2565 pBt->pageSize = pageSize; 2950 pBt->pageSize = pageSize;
2566 pBt->usableSize = usableSize; 2951 pBt->usableSize = usableSize;
2567 #ifndef SQLITE_OMIT_AUTOVACUUM 2952 #ifndef SQLITE_OMIT_AUTOVACUUM
2568 pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); 2953 pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
2569 pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); 2954 pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0);
2570 #endif 2955 #endif
2571 } 2956 }
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
2636 ** If there is a transaction in progress, this routine is a no-op. 3021 ** If there is a transaction in progress, this routine is a no-op.
2637 */ 3022 */
2638 static void unlockBtreeIfUnused(BtShared *pBt){ 3023 static void unlockBtreeIfUnused(BtShared *pBt){
2639 assert( sqlite3_mutex_held(pBt->mutex) ); 3024 assert( sqlite3_mutex_held(pBt->mutex) );
2640 assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); 3025 assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE );
2641 if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ 3026 if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){
2642 MemPage *pPage1 = pBt->pPage1; 3027 MemPage *pPage1 = pBt->pPage1;
2643 assert( pPage1->aData ); 3028 assert( pPage1->aData );
2644 assert( sqlite3PagerRefcount(pBt->pPager)==1 ); 3029 assert( sqlite3PagerRefcount(pBt->pPager)==1 );
2645 pBt->pPage1 = 0; 3030 pBt->pPage1 = 0;
2646 releasePage(pPage1); 3031 releasePageNotNull(pPage1);
2647 } 3032 }
2648 } 3033 }
2649 3034
2650 /* 3035 /*
2651 ** If pBt points to an empty file then convert that empty file 3036 ** If pBt points to an empty file then convert that empty file
2652 ** into a new empty database by initializing the first page of 3037 ** into a new empty database by initializing the first page of
2653 ** the database. 3038 ** the database.
2654 */ 3039 */
2655 static int newDatabase(BtShared *pBt){ 3040 static int newDatabase(BtShared *pBt){
2656 MemPage *pP1; 3041 MemPage *pP1;
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
2941 if( eType==PTRMAP_OVERFLOW2 ){ 3326 if( eType==PTRMAP_OVERFLOW2 ){
2942 /* The pointer is always the first 4 bytes of the page in this case. */ 3327 /* The pointer is always the first 4 bytes of the page in this case. */
2943 if( get4byte(pPage->aData)!=iFrom ){ 3328 if( get4byte(pPage->aData)!=iFrom ){
2944 return SQLITE_CORRUPT_BKPT; 3329 return SQLITE_CORRUPT_BKPT;
2945 } 3330 }
2946 put4byte(pPage->aData, iTo); 3331 put4byte(pPage->aData, iTo);
2947 }else{ 3332 }else{
2948 u8 isInitOrig = pPage->isInit; 3333 u8 isInitOrig = pPage->isInit;
2949 int i; 3334 int i;
2950 int nCell; 3335 int nCell;
3336 int rc;
2951 3337
2952 btreeInitPage(pPage); 3338 rc = btreeInitPage(pPage);
3339 if( rc ) return rc;
2953 nCell = pPage->nCell; 3340 nCell = pPage->nCell;
2954 3341
2955 for(i=0; i<nCell; i++){ 3342 for(i=0; i<nCell; i++){
2956 u8 *pCell = findCell(pPage, i); 3343 u8 *pCell = findCell(pPage, i);
2957 if( eType==PTRMAP_OVERFLOW1 ){ 3344 if( eType==PTRMAP_OVERFLOW1 ){
2958 CellInfo info; 3345 CellInfo info;
2959 btreeParseCellPtr(pPage, pCell, &info); 3346 pPage->xParseCell(pPage, pCell, &info);
2960 if( info.iOverflow 3347 if( info.nLocal<info.nPayload
2961 && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage 3348 && pCell+info.nSize-1<=pPage->aData+pPage->maskPage
2962 && iFrom==get4byte(&pCell[info.iOverflow]) 3349 && iFrom==get4byte(pCell+info.nSize-4)
2963 ){ 3350 ){
2964 put4byte(&pCell[info.iOverflow], iTo); 3351 put4byte(pCell+info.nSize-4, iTo);
2965 break; 3352 break;
2966 } 3353 }
2967 }else{ 3354 }else{
2968 if( get4byte(pCell)==iFrom ){ 3355 if( get4byte(pCell)==iFrom ){
2969 put4byte(pCell, iTo); 3356 put4byte(pCell, iTo);
2970 break; 3357 break;
2971 } 3358 }
2972 } 3359 }
2973 } 3360 }
2974 3361
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
3248 ** is committed for an auto-vacuum database. 3635 ** is committed for an auto-vacuum database.
3249 ** 3636 **
3250 ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages 3637 ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
3251 ** the database file should be truncated to during the commit process. 3638 ** the database file should be truncated to during the commit process.
3252 ** i.e. the database has been reorganized so that only the first *pnTrunc 3639 ** i.e. the database has been reorganized so that only the first *pnTrunc
3253 ** pages are in use. 3640 ** pages are in use.
3254 */ 3641 */
3255 static int autoVacuumCommit(BtShared *pBt){ 3642 static int autoVacuumCommit(BtShared *pBt){
3256 int rc = SQLITE_OK; 3643 int rc = SQLITE_OK;
3257 Pager *pPager = pBt->pPager; 3644 Pager *pPager = pBt->pPager;
3258 VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager) ); 3645 VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); )
3259 3646
3260 assert( sqlite3_mutex_held(pBt->mutex) ); 3647 assert( sqlite3_mutex_held(pBt->mutex) );
3261 invalidateAllOverflowCache(pBt); 3648 invalidateAllOverflowCache(pBt);
3262 assert(pBt->autoVacuum); 3649 assert(pBt->autoVacuum);
3263 if( !pBt->incrVacuum ){ 3650 if( !pBt->incrVacuum ){
3264 Pgno nFin; /* Number of pages in database after autovacuuming */ 3651 Pgno nFin; /* Number of pages in database after autovacuuming */
3265 Pgno nFree; /* Number of pages on the freelist initially */ 3652 Pgno nFree; /* Number of pages on the freelist initially */
3266 Pgno iFree; /* The next page to be freed */ 3653 Pgno iFree; /* The next page to be freed */
3267 Pgno nOrig; /* Database size before freeing */ 3654 Pgno nOrig; /* Database size before freeing */
3268 3655
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
3432 if( p->inTrans==TRANS_WRITE ){ 3819 if( p->inTrans==TRANS_WRITE ){
3433 int rc; 3820 int rc;
3434 BtShared *pBt = p->pBt; 3821 BtShared *pBt = p->pBt;
3435 assert( pBt->inTransaction==TRANS_WRITE ); 3822 assert( pBt->inTransaction==TRANS_WRITE );
3436 assert( pBt->nTransaction>0 ); 3823 assert( pBt->nTransaction>0 );
3437 rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); 3824 rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
3438 if( rc!=SQLITE_OK && bCleanup==0 ){ 3825 if( rc!=SQLITE_OK && bCleanup==0 ){
3439 sqlite3BtreeLeave(p); 3826 sqlite3BtreeLeave(p);
3440 return rc; 3827 return rc;
3441 } 3828 }
3829 p->iDataVersion--; /* Compensate for pPager->iDataVersion++; */
3442 pBt->inTransaction = TRANS_READ; 3830 pBt->inTransaction = TRANS_READ;
3443 btreeClearHasContent(pBt); 3831 btreeClearHasContent(pBt);
3444 } 3832 }
3445 3833
3446 btreeEndTransaction(p); 3834 btreeEndTransaction(p);
3447 sqlite3BtreeLeave(p); 3835 sqlite3BtreeLeave(p);
3448 return SQLITE_OK; 3836 return SQLITE_OK;
3449 } 3837 }
3450 3838
3451 /* 3839 /*
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
3491 int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ 3879 int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){
3492 BtCursor *p; 3880 BtCursor *p;
3493 int rc = SQLITE_OK; 3881 int rc = SQLITE_OK;
3494 3882
3495 assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); 3883 assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 );
3496 if( pBtree ){ 3884 if( pBtree ){
3497 sqlite3BtreeEnter(pBtree); 3885 sqlite3BtreeEnter(pBtree);
3498 for(p=pBtree->pBt->pCursor; p; p=p->pNext){ 3886 for(p=pBtree->pBt->pCursor; p; p=p->pNext){
3499 int i; 3887 int i;
3500 if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ 3888 if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){
3501 if( p->eState==CURSOR_VALID ){ 3889 if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){
3502 rc = saveCursorPosition(p); 3890 rc = saveCursorPosition(p);
3503 if( rc!=SQLITE_OK ){ 3891 if( rc!=SQLITE_OK ){
3504 (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); 3892 (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0);
3505 break; 3893 break;
3506 } 3894 }
3507 } 3895 }
3508 }else{ 3896 }else{
3509 sqlite3BtreeClearCursor(p); 3897 sqlite3BtreeClearCursor(p);
3510 p->eState = CURSOR_FAULT; 3898 p->eState = CURSOR_FAULT;
3511 p->skipNext = errCode; 3899 p->skipNext = errCode;
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
3689 ** on pCur to initialize the memory space prior to invoking this routine. 4077 ** on pCur to initialize the memory space prior to invoking this routine.
3690 */ 4078 */
3691 static int btreeCursor( 4079 static int btreeCursor(
3692 Btree *p, /* The btree */ 4080 Btree *p, /* The btree */
3693 int iTable, /* Root page of table to open */ 4081 int iTable, /* Root page of table to open */
3694 int wrFlag, /* 1 to write. 0 read-only */ 4082 int wrFlag, /* 1 to write. 0 read-only */
3695 struct KeyInfo *pKeyInfo, /* First arg to comparison function */ 4083 struct KeyInfo *pKeyInfo, /* First arg to comparison function */
3696 BtCursor *pCur /* Space for new cursor */ 4084 BtCursor *pCur /* Space for new cursor */
3697 ){ 4085 ){
3698 BtShared *pBt = p->pBt; /* Shared b-tree handle */ 4086 BtShared *pBt = p->pBt; /* Shared b-tree handle */
4087 BtCursor *pX; /* Looping over other all cursors */
3699 4088
3700 assert( sqlite3BtreeHoldsMutex(p) ); 4089 assert( sqlite3BtreeHoldsMutex(p) );
3701 assert( wrFlag==0 || wrFlag==1 ); 4090 assert( wrFlag==0
4091 || wrFlag==BTREE_WRCSR
4092 || wrFlag==(BTREE_WRCSR|BTREE_FORDELETE)
4093 );
3702 4094
3703 /* The following assert statements verify that if this is a sharable 4095 /* The following assert statements verify that if this is a sharable
3704 ** b-tree database, the connection is holding the required table locks, 4096 ** b-tree database, the connection is holding the required table locks,
3705 ** and that no other connection has any open cursor that conflicts with 4097 ** and that no other connection has any open cursor that conflicts with
3706 ** this lock. */ 4098 ** this lock. */
3707 assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, wrFlag+1) ); 4099 assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, (wrFlag?2:1)) );
3708 assert( wrFlag==0 || !hasReadConflicts(p, iTable) ); 4100 assert( wrFlag==0 || !hasReadConflicts(p, iTable) );
3709 4101
3710 /* Assert that the caller has opened the required transaction. */ 4102 /* Assert that the caller has opened the required transaction. */
3711 assert( p->inTrans>TRANS_NONE ); 4103 assert( p->inTrans>TRANS_NONE );
3712 assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); 4104 assert( wrFlag==0 || p->inTrans==TRANS_WRITE );
3713 assert( pBt->pPage1 && pBt->pPage1->aData ); 4105 assert( pBt->pPage1 && pBt->pPage1->aData );
4106 assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 );
3714 4107
3715 if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){
3716 return SQLITE_READONLY;
3717 }
3718 if( wrFlag ){ 4108 if( wrFlag ){
3719 allocateTempSpace(pBt); 4109 allocateTempSpace(pBt);
3720 if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; 4110 if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM;
3721 } 4111 }
3722 if( iTable==1 && btreePagecount(pBt)==0 ){ 4112 if( iTable==1 && btreePagecount(pBt)==0 ){
3723 assert( wrFlag==0 ); 4113 assert( wrFlag==0 );
3724 iTable = 0; 4114 iTable = 0;
3725 } 4115 }
3726 4116
3727 /* Now that no other errors can occur, finish filling in the BtCursor 4117 /* Now that no other errors can occur, finish filling in the BtCursor
3728 ** variables and link the cursor into the BtShared list. */ 4118 ** variables and link the cursor into the BtShared list. */
3729 pCur->pgnoRoot = (Pgno)iTable; 4119 pCur->pgnoRoot = (Pgno)iTable;
3730 pCur->iPage = -1; 4120 pCur->iPage = -1;
3731 pCur->pKeyInfo = pKeyInfo; 4121 pCur->pKeyInfo = pKeyInfo;
3732 pCur->pBtree = p; 4122 pCur->pBtree = p;
3733 pCur->pBt = pBt; 4123 pCur->pBt = pBt;
3734 assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); 4124 pCur->curFlags = wrFlag ? BTCF_WriteFlag : 0;
3735 pCur->curFlags = wrFlag; 4125 pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY;
4126 /* If there are two or more cursors on the same btree, then all such
4127 ** cursors *must* have the BTCF_Multiple flag set. */
4128 for(pX=pBt->pCursor; pX; pX=pX->pNext){
4129 if( pX->pgnoRoot==(Pgno)iTable ){
4130 pX->curFlags |= BTCF_Multiple;
4131 pCur->curFlags |= BTCF_Multiple;
4132 }
4133 }
3736 pCur->pNext = pBt->pCursor; 4134 pCur->pNext = pBt->pCursor;
3737 if( pCur->pNext ){
3738 pCur->pNext->pPrev = pCur;
3739 }
3740 pBt->pCursor = pCur; 4135 pBt->pCursor = pCur;
3741 pCur->eState = CURSOR_INVALID; 4136 pCur->eState = CURSOR_INVALID;
3742 return SQLITE_OK; 4137 return SQLITE_OK;
3743 } 4138 }
3744 int sqlite3BtreeCursor( 4139 int sqlite3BtreeCursor(
3745 Btree *p, /* The btree */ 4140 Btree *p, /* The btree */
3746 int iTable, /* Root page of table to open */ 4141 int iTable, /* Root page of table to open */
3747 int wrFlag, /* 1 to write. 0 read-only */ 4142 int wrFlag, /* 1 to write. 0 read-only */
3748 struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ 4143 struct KeyInfo *pKeyInfo, /* First arg to xCompare() */
3749 BtCursor *pCur /* Write new cursor here */ 4144 BtCursor *pCur /* Write new cursor here */
3750 ){ 4145 ){
3751 int rc; 4146 int rc;
3752 sqlite3BtreeEnter(p); 4147 if( iTable<1 ){
3753 rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); 4148 rc = SQLITE_CORRUPT_BKPT;
3754 sqlite3BtreeLeave(p); 4149 }else{
4150 sqlite3BtreeEnter(p);
4151 rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur);
4152 sqlite3BtreeLeave(p);
4153 }
3755 return rc; 4154 return rc;
3756 } 4155 }
3757 4156
3758 /* 4157 /*
3759 ** Return the size of a BtCursor object in bytes. 4158 ** Return the size of a BtCursor object in bytes.
3760 ** 4159 **
3761 ** This interfaces is needed so that users of cursors can preallocate 4160 ** This interfaces is needed so that users of cursors can preallocate
3762 ** sufficient storage to hold a cursor. The BtCursor object is opaque 4161 ** sufficient storage to hold a cursor. The BtCursor object is opaque
3763 ** to users so they cannot do the sizeof() themselves - they must call 4162 ** to users so they cannot do the sizeof() themselves - they must call
3764 ** this routine. 4163 ** this routine.
(...skipping 18 matching lines...) Expand all
3783 ** Close a cursor. The read lock on the database file is released 4182 ** Close a cursor. The read lock on the database file is released
3784 ** when the last cursor is closed. 4183 ** when the last cursor is closed.
3785 */ 4184 */
3786 int sqlite3BtreeCloseCursor(BtCursor *pCur){ 4185 int sqlite3BtreeCloseCursor(BtCursor *pCur){
3787 Btree *pBtree = pCur->pBtree; 4186 Btree *pBtree = pCur->pBtree;
3788 if( pBtree ){ 4187 if( pBtree ){
3789 int i; 4188 int i;
3790 BtShared *pBt = pCur->pBt; 4189 BtShared *pBt = pCur->pBt;
3791 sqlite3BtreeEnter(pBtree); 4190 sqlite3BtreeEnter(pBtree);
3792 sqlite3BtreeClearCursor(pCur); 4191 sqlite3BtreeClearCursor(pCur);
3793 if( pCur->pPrev ){ 4192 assert( pBt->pCursor!=0 );
3794 pCur->pPrev->pNext = pCur->pNext; 4193 if( pBt->pCursor==pCur ){
4194 pBt->pCursor = pCur->pNext;
3795 }else{ 4195 }else{
3796 pBt->pCursor = pCur->pNext; 4196 BtCursor *pPrev = pBt->pCursor;
3797 } 4197 do{
3798 if( pCur->pNext ){ 4198 if( pPrev->pNext==pCur ){
3799 pCur->pNext->pPrev = pCur->pPrev; 4199 pPrev->pNext = pCur->pNext;
4200 break;
4201 }
4202 pPrev = pPrev->pNext;
4203 }while( ALWAYS(pPrev) );
3800 } 4204 }
3801 for(i=0; i<=pCur->iPage; i++){ 4205 for(i=0; i<=pCur->iPage; i++){
3802 releasePage(pCur->apPage[i]); 4206 releasePage(pCur->apPage[i]);
3803 } 4207 }
3804 unlockBtreeIfUnused(pBt); 4208 unlockBtreeIfUnused(pBt);
3805 sqlite3DbFree(pBtree->db, pCur->aOverflow); 4209 sqlite3_free(pCur->aOverflow);
3806 /* sqlite3_free(pCur); */ 4210 /* sqlite3_free(pCur); */
3807 sqlite3BtreeLeave(pBtree); 4211 sqlite3BtreeLeave(pBtree);
3808 } 4212 }
3809 return SQLITE_OK; 4213 return SQLITE_OK;
3810 } 4214 }
3811 4215
3812 /* 4216 /*
3813 ** Make sure the BtCursor* given in the argument has a valid 4217 ** Make sure the BtCursor* given in the argument has a valid
3814 ** BtCursor.info structure. If it is not already valid, call 4218 ** BtCursor.info structure. If it is not already valid, call
3815 ** btreeParseCell() to fill it in. 4219 ** btreeParseCell() to fill it in.
3816 ** 4220 **
3817 ** BtCursor.info is a cache of the information in the current cell. 4221 ** BtCursor.info is a cache of the information in the current cell.
3818 ** Using this cache reduces the number of calls to btreeParseCell(). 4222 ** Using this cache reduces the number of calls to btreeParseCell().
3819 **
3820 ** 2007-06-25: There is a bug in some versions of MSVC that cause the
3821 ** compiler to crash when getCellInfo() is implemented as a macro.
3822 ** But there is a measureable speed advantage to using the macro on gcc
3823 ** (when less compiler optimizations like -Os or -O0 are used and the
3824 ** compiler is not doing aggressive inlining.) So we use a real function
3825 ** for MSVC and a macro for everything else. Ticket #2457.
3826 */ 4223 */
3827 #ifndef NDEBUG 4224 #ifndef NDEBUG
3828 static void assertCellInfo(BtCursor *pCur){ 4225 static void assertCellInfo(BtCursor *pCur){
3829 CellInfo info; 4226 CellInfo info;
3830 int iPage = pCur->iPage; 4227 int iPage = pCur->iPage;
3831 memset(&info, 0, sizeof(info)); 4228 memset(&info, 0, sizeof(info));
3832 btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info); 4229 btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info);
3833 assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 ); 4230 assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 );
3834 } 4231 }
3835 #else 4232 #else
3836 #define assertCellInfo(x) 4233 #define assertCellInfo(x)
3837 #endif 4234 #endif
3838 #ifdef _MSC_VER 4235 static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){
3839 /* Use a real function in MSVC to work around bugs in that compiler. */ 4236 if( pCur->info.nSize==0 ){
3840 static void getCellInfo(BtCursor *pCur){ 4237 int iPage = pCur->iPage;
3841 if( pCur->info.nSize==0 ){ 4238 pCur->curFlags |= BTCF_ValidNKey;
3842 int iPage = pCur->iPage; 4239 btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info);
3843 btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); 4240 }else{
3844 pCur->curFlags |= BTCF_ValidNKey; 4241 assertCellInfo(pCur);
3845 }else{
3846 assertCellInfo(pCur);
3847 }
3848 } 4242 }
3849 #else /* if not _MSC_VER */ 4243 }
3850 /* Use a macro in all other compilers so that the function is inlined */
3851 #define getCellInfo(pCur) \
3852 if( pCur->info.nSize==0 ){ \
3853 int iPage = pCur->iPage; \
3854 btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \
3855 pCur->curFlags |= BTCF_ValidNKey; \
3856 }else{ \
3857 assertCellInfo(pCur); \
3858 }
3859 #endif /* _MSC_VER */
3860 4244
3861 #ifndef NDEBUG /* The next routine used only within assert() statements */ 4245 #ifndef NDEBUG /* The next routine used only within assert() statements */
3862 /* 4246 /*
3863 ** Return true if the given BtCursor is valid. A valid cursor is one 4247 ** Return true if the given BtCursor is valid. A valid cursor is one
3864 ** that is currently pointing to a row in a (non-empty) table. 4248 ** that is currently pointing to a row in a (non-empty) table.
3865 ** This is a verification routine is used only within assert() statements. 4249 ** This is a verification routine is used only within assert() statements.
3866 */ 4250 */
3867 int sqlite3BtreeCursorIsValid(BtCursor *pCur){ 4251 int sqlite3BtreeCursorIsValid(BtCursor *pCur){
3868 return pCur && pCur->eState==CURSOR_VALID; 4252 return pCur && pCur->eState==CURSOR_VALID;
3869 } 4253 }
(...skipping 27 matching lines...) Expand all
3897 ** valid entry. In other words, the calling procedure must guarantee 4281 ** valid entry. In other words, the calling procedure must guarantee
3898 ** that the cursor has Cursor.eState==CURSOR_VALID. 4282 ** that the cursor has Cursor.eState==CURSOR_VALID.
3899 ** 4283 **
3900 ** Failure is not possible. This function always returns SQLITE_OK. 4284 ** Failure is not possible. This function always returns SQLITE_OK.
3901 ** It might just as well be a procedure (returning void) but we continue 4285 ** It might just as well be a procedure (returning void) but we continue
3902 ** to return an integer result code for historical reasons. 4286 ** to return an integer result code for historical reasons.
3903 */ 4287 */
3904 int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ 4288 int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
3905 assert( cursorHoldsMutex(pCur) ); 4289 assert( cursorHoldsMutex(pCur) );
3906 assert( pCur->eState==CURSOR_VALID ); 4290 assert( pCur->eState==CURSOR_VALID );
4291 assert( pCur->iPage>=0 );
4292 assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
3907 assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 ); 4293 assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 );
3908 getCellInfo(pCur); 4294 getCellInfo(pCur);
3909 *pSize = pCur->info.nPayload; 4295 *pSize = pCur->info.nPayload;
3910 return SQLITE_OK; 4296 return SQLITE_OK;
3911 } 4297 }
3912 4298
3913 /* 4299 /*
3914 ** Given the page number of an overflow page in the database (parameter 4300 ** Given the page number of an overflow page in the database (parameter
3915 ** ovfl), this function finds the page number of the next page in the 4301 ** ovfl), this function finds the page number of the next page in the
3916 ** linked list of overflow pages. If possible, it uses the auto-vacuum 4302 ** linked list of overflow pages. If possible, it uses the auto-vacuum
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
4090 a = pCur->info.nLocal - offset; 4476 a = pCur->info.nLocal - offset;
4091 } 4477 }
4092 rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); 4478 rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage);
4093 offset = 0; 4479 offset = 0;
4094 pBuf += a; 4480 pBuf += a;
4095 amt -= a; 4481 amt -= a;
4096 }else{ 4482 }else{
4097 offset -= pCur->info.nLocal; 4483 offset -= pCur->info.nLocal;
4098 } 4484 }
4099 4485
4486
4100 if( rc==SQLITE_OK && amt>0 ){ 4487 if( rc==SQLITE_OK && amt>0 ){
4101 const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ 4488 const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */
4102 Pgno nextPage; 4489 Pgno nextPage;
4103 4490
4104 nextPage = get4byte(&aPayload[pCur->info.nLocal]); 4491 nextPage = get4byte(&aPayload[pCur->info.nLocal]);
4105 4492
4106 /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. 4493 /* If the BtCursor.aOverflow[] has not been allocated, allocate it now.
4107 ** Except, do not allocate aOverflow[] for eOp==2. 4494 ** Except, do not allocate aOverflow[] for eOp==2.
4108 ** 4495 **
4109 ** The aOverflow[] array is sized at one entry for each overflow page 4496 ** The aOverflow[] array is sized at one entry for each overflow page
4110 ** in the overflow chain. The page number of the first overflow page is 4497 ** in the overflow chain. The page number of the first overflow page is
4111 ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array 4498 ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array
4112 ** means "not yet known" (the cache is lazily populated). 4499 ** means "not yet known" (the cache is lazily populated).
4113 */ 4500 */
4114 if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){ 4501 if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){
4115 int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; 4502 int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
4116 if( nOvfl>pCur->nOvflAlloc ){ 4503 if( nOvfl>pCur->nOvflAlloc ){
4117 Pgno *aNew = (Pgno*)sqlite3DbRealloc( 4504 Pgno *aNew = (Pgno*)sqlite3Realloc(
4118 pCur->pBtree->db, pCur->aOverflow, nOvfl*2*sizeof(Pgno) 4505 pCur->aOverflow, nOvfl*2*sizeof(Pgno)
4119 ); 4506 );
4120 if( aNew==0 ){ 4507 if( aNew==0 ){
4121 rc = SQLITE_NOMEM; 4508 rc = SQLITE_NOMEM;
4122 }else{ 4509 }else{
4123 pCur->nOvflAlloc = nOvfl*2; 4510 pCur->nOvflAlloc = nOvfl*2;
4124 pCur->aOverflow = aNew; 4511 pCur->aOverflow = aNew;
4125 } 4512 }
4126 } 4513 }
4127 if( rc==SQLITE_OK ){ 4514 if( rc==SQLITE_OK ){
4128 memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); 4515 memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno));
(...skipping 10 matching lines...) Expand all
4139 ){ 4526 ){
4140 iIdx = (offset/ovflSize); 4527 iIdx = (offset/ovflSize);
4141 nextPage = pCur->aOverflow[iIdx]; 4528 nextPage = pCur->aOverflow[iIdx];
4142 offset = (offset%ovflSize); 4529 offset = (offset%ovflSize);
4143 } 4530 }
4144 4531
4145 for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ 4532 for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
4146 4533
4147 /* If required, populate the overflow page-list cache. */ 4534 /* If required, populate the overflow page-list cache. */
4148 if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){ 4535 if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){
4149 assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); 4536 assert( pCur->aOverflow[iIdx]==0
4537 || pCur->aOverflow[iIdx]==nextPage
4538 || CORRUPT_DB );
4150 pCur->aOverflow[iIdx] = nextPage; 4539 pCur->aOverflow[iIdx] = nextPage;
4151 } 4540 }
4152 4541
4153 if( offset>=ovflSize ){ 4542 if( offset>=ovflSize ){
4154 /* The only reason to read this page is to obtain the page 4543 /* The only reason to read this page is to obtain the page
4155 ** number for the next page in the overflow chain. The page 4544 ** number for the next page in the overflow chain. The page
4156 ** data is not required. So first try to lookup the overflow 4545 ** data is not required. So first try to lookup the overflow
4157 ** page-list cache, if any, then fall back to the getOverflowPage() 4546 ** page-list cache, if any, then fall back to the getOverflowPage()
4158 ** function. 4547 ** function.
4159 ** 4548 **
4160 ** Note that the aOverflow[] array must be allocated because eOp!=2 4549 ** Note that the aOverflow[] array must be allocated because eOp!=2
4161 ** here. If eOp==2, then offset==0 and this branch is never taken. 4550 ** here. If eOp==2, then offset==0 and this branch is never taken.
4162 */ 4551 */
4163 assert( eOp!=2 ); 4552 assert( eOp!=2 );
4164 assert( pCur->curFlags & BTCF_ValidOvfl ); 4553 assert( pCur->curFlags & BTCF_ValidOvfl );
4554 assert( pCur->pBtree->db==pBt->db );
4165 if( pCur->aOverflow[iIdx+1] ){ 4555 if( pCur->aOverflow[iIdx+1] ){
4166 nextPage = pCur->aOverflow[iIdx+1]; 4556 nextPage = pCur->aOverflow[iIdx+1];
4167 }else{ 4557 }else{
4168 rc = getOverflowPage(pBt, nextPage, 0, &nextPage); 4558 rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
4169 } 4559 }
4170 offset -= ovflSize; 4560 offset -= ovflSize;
4171 }else{ 4561 }else{
4172 /* Need to read this page properly. It contains some of the 4562 /* Need to read this page properly. It contains some of the
4173 ** range of data that is being read (eOp==0) or written (eOp!=0). 4563 ** range of data that is being read (eOp==0) or written (eOp!=0).
4174 */ 4564 */
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
4208 assert( aWrite>=pBufStart ); /* hence (7) */ 4598 assert( aWrite>=pBufStart ); /* hence (7) */
4209 memcpy(aSave, aWrite, 4); 4599 memcpy(aSave, aWrite, 4);
4210 rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); 4600 rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1));
4211 nextPage = get4byte(aWrite); 4601 nextPage = get4byte(aWrite);
4212 memcpy(aWrite, aSave, 4); 4602 memcpy(aWrite, aSave, 4);
4213 }else 4603 }else
4214 #endif 4604 #endif
4215 4605
4216 { 4606 {
4217 DbPage *pDbPage; 4607 DbPage *pDbPage;
4218 rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, 4608 rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage,
4219 ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) 4609 ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0)
4220 ); 4610 );
4221 if( rc==SQLITE_OK ){ 4611 if( rc==SQLITE_OK ){
4222 aPayload = sqlite3PagerGetData(pDbPage); 4612 aPayload = sqlite3PagerGetData(pDbPage);
4223 nextPage = get4byte(aPayload); 4613 nextPage = get4byte(aPayload);
4224 rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); 4614 rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage);
4225 sqlite3PagerUnref(pDbPage); 4615 sqlite3PagerUnref(pDbPage);
4226 offset = 0; 4616 offset = 0;
4227 } 4617 }
4228 } 4618 }
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
4303 ** the key/data and copy it into a preallocated buffer. 4693 ** the key/data and copy it into a preallocated buffer.
4304 ** 4694 **
4305 ** The pointer returned by this routine looks directly into the cached 4695 ** The pointer returned by this routine looks directly into the cached
4306 ** page of the database. The data might change or move the next time 4696 ** page of the database. The data might change or move the next time
4307 ** any btree routine is called. 4697 ** any btree routine is called.
4308 */ 4698 */
4309 static const void *fetchPayload( 4699 static const void *fetchPayload(
4310 BtCursor *pCur, /* Cursor pointing to entry to read from */ 4700 BtCursor *pCur, /* Cursor pointing to entry to read from */
4311 u32 *pAmt /* Write the number of available bytes here */ 4701 u32 *pAmt /* Write the number of available bytes here */
4312 ){ 4702 ){
4703 u32 amt;
4313 assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); 4704 assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]);
4314 assert( pCur->eState==CURSOR_VALID ); 4705 assert( pCur->eState==CURSOR_VALID );
4315 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 4706 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
4316 assert( cursorHoldsMutex(pCur) ); 4707 assert( cursorHoldsMutex(pCur) );
4317 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); 4708 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
4318 assert( pCur->info.nSize>0 ); 4709 assert( pCur->info.nSize>0 );
4319 *pAmt = pCur->info.nLocal; 4710 assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB );
4711 assert( pCur->info.pPayload<pCur->apPage[pCur->iPage]->aDataEnd ||CORRUPT_DB);
4712 amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload);
4713 if( pCur->info.nLocal<amt ) amt = pCur->info.nLocal;
4714 *pAmt = amt;
4320 return (void*)pCur->info.pPayload; 4715 return (void*)pCur->info.pPayload;
4321 } 4716 }
4322 4717
4323 4718
4324 /* 4719 /*
4325 ** For the entry that cursor pCur is point to, return as 4720 ** For the entry that cursor pCur is point to, return as
4326 ** many bytes of the key or data as are available on the local 4721 ** many bytes of the key or data as are available on the local
4327 ** b-tree page. Write the number of available bytes into *pAmt. 4722 ** b-tree page. Write the number of available bytes into *pAmt.
4328 ** 4723 **
4329 ** The pointer returned is ephemeral. The key/data may move 4724 ** The pointer returned is ephemeral. The key/data may move
(...skipping 16 matching lines...) Expand all
4346 /* 4741 /*
4347 ** Move the cursor down to a new child page. The newPgno argument is the 4742 ** Move the cursor down to a new child page. The newPgno argument is the
4348 ** page number of the child page to move to. 4743 ** page number of the child page to move to.
4349 ** 4744 **
4350 ** This function returns SQLITE_CORRUPT if the page-header flags field of 4745 ** This function returns SQLITE_CORRUPT if the page-header flags field of
4351 ** the new child page does not match the flags field of the parent (i.e. 4746 ** the new child page does not match the flags field of the parent (i.e.
4352 ** if an intkey page appears to be the parent of a non-intkey page, or 4747 ** if an intkey page appears to be the parent of a non-intkey page, or
4353 ** vice-versa). 4748 ** vice-versa).
4354 */ 4749 */
4355 static int moveToChild(BtCursor *pCur, u32 newPgno){ 4750 static int moveToChild(BtCursor *pCur, u32 newPgno){
4356 int rc;
4357 int i = pCur->iPage;
4358 MemPage *pNewPage;
4359 BtShared *pBt = pCur->pBt; 4751 BtShared *pBt = pCur->pBt;
4360 4752
4361 assert( cursorHoldsMutex(pCur) ); 4753 assert( cursorHoldsMutex(pCur) );
4362 assert( pCur->eState==CURSOR_VALID ); 4754 assert( pCur->eState==CURSOR_VALID );
4363 assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); 4755 assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
4364 assert( pCur->iPage>=0 ); 4756 assert( pCur->iPage>=0 );
4365 if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ 4757 if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
4366 return SQLITE_CORRUPT_BKPT; 4758 return SQLITE_CORRUPT_BKPT;
4367 } 4759 }
4368 rc = getAndInitPage(pBt, newPgno, &pNewPage,
4369 (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0);
4370 if( rc ) return rc;
4371 pCur->apPage[i+1] = pNewPage;
4372 pCur->aiIdx[i+1] = 0;
4373 pCur->iPage++;
4374
4375 pCur->info.nSize = 0; 4760 pCur->info.nSize = 0;
4376 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); 4761 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
4377 if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){ 4762 pCur->iPage++;
4378 return SQLITE_CORRUPT_BKPT; 4763 pCur->aiIdx[pCur->iPage] = 0;
4379 } 4764 return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage],
4380 return SQLITE_OK; 4765 pCur, pCur->curPagerFlags);
4381 } 4766 }
4382 4767
4383 #if 0 4768 #if SQLITE_DEBUG
4384 /* 4769 /*
4385 ** Page pParent is an internal (non-leaf) tree page. This function 4770 ** Page pParent is an internal (non-leaf) tree page. This function
4386 ** asserts that page number iChild is the left-child if the iIdx'th 4771 ** asserts that page number iChild is the left-child if the iIdx'th
4387 ** cell in page pParent. Or, if iIdx is equal to the total number of 4772 ** cell in page pParent. Or, if iIdx is equal to the total number of
4388 ** cells in pParent, that page number iChild is the right-child of 4773 ** cells in pParent, that page number iChild is the right-child of
4389 ** the page. 4774 ** the page.
4390 */ 4775 */
4391 static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ 4776 static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){
4777 if( CORRUPT_DB ) return; /* The conditions tested below might not be true
4778 ** in a corrupt database */
4392 assert( iIdx<=pParent->nCell ); 4779 assert( iIdx<=pParent->nCell );
4393 if( iIdx==pParent->nCell ){ 4780 if( iIdx==pParent->nCell ){
4394 assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); 4781 assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild );
4395 }else{ 4782 }else{
4396 assert( get4byte(findCell(pParent, iIdx))==iChild ); 4783 assert( get4byte(findCell(pParent, iIdx))==iChild );
4397 } 4784 }
4398 } 4785 }
4399 #else 4786 #else
4400 # define assertParentIndex(x,y,z) 4787 # define assertParentIndex(x,y,z)
4401 #endif 4788 #endif
4402 4789
4403 /* 4790 /*
4404 ** Move the cursor up to the parent page. 4791 ** Move the cursor up to the parent page.
4405 ** 4792 **
4406 ** pCur->idx is set to the cell index that contains the pointer 4793 ** pCur->idx is set to the cell index that contains the pointer
4407 ** to the page we are coming from. If we are coming from the 4794 ** to the page we are coming from. If we are coming from the
4408 ** right-most child page then pCur->idx is set to one more than 4795 ** right-most child page then pCur->idx is set to one more than
4409 ** the largest cell index. 4796 ** the largest cell index.
4410 */ 4797 */
4411 static void moveToParent(BtCursor *pCur){ 4798 static void moveToParent(BtCursor *pCur){
4412 assert( cursorHoldsMutex(pCur) ); 4799 assert( cursorHoldsMutex(pCur) );
4413 assert( pCur->eState==CURSOR_VALID ); 4800 assert( pCur->eState==CURSOR_VALID );
4414 assert( pCur->iPage>0 ); 4801 assert( pCur->iPage>0 );
4415 assert( pCur->apPage[pCur->iPage] ); 4802 assert( pCur->apPage[pCur->iPage] );
4416
4417 /* UPDATE: It is actually possible for the condition tested by the assert
4418 ** below to be untrue if the database file is corrupt. This can occur if
4419 ** one cursor has modified page pParent while a reference to it is held
4420 ** by a second cursor. Which can only happen if a single page is linked
4421 ** into more than one b-tree structure in a corrupt database. */
4422 #if 0
4423 assertParentIndex( 4803 assertParentIndex(
4424 pCur->apPage[pCur->iPage-1], 4804 pCur->apPage[pCur->iPage-1],
4425 pCur->aiIdx[pCur->iPage-1], 4805 pCur->aiIdx[pCur->iPage-1],
4426 pCur->apPage[pCur->iPage]->pgno 4806 pCur->apPage[pCur->iPage]->pgno
4427 ); 4807 );
4428 #endif
4429 testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); 4808 testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell );
4430
4431 releasePage(pCur->apPage[pCur->iPage]);
4432 pCur->iPage--;
4433 pCur->info.nSize = 0; 4809 pCur->info.nSize = 0;
4434 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); 4810 pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
4811 releasePageNotNull(pCur->apPage[pCur->iPage--]);
4435 } 4812 }
4436 4813
4437 /* 4814 /*
4438 ** Move the cursor to point to the root page of its b-tree structure. 4815 ** Move the cursor to point to the root page of its b-tree structure.
4439 ** 4816 **
4440 ** If the table has a virtual root page, then the cursor is moved to point 4817 ** If the table has a virtual root page, then the cursor is moved to point
4441 ** to the virtual root page instead of the actual root page. A table has a 4818 ** to the virtual root page instead of the actual root page. A table has a
4442 ** virtual root page when the actual root page contains no cells and a 4819 ** virtual root page when the actual root page contains no cells and a
4443 ** single child page. This can only happen with the table rooted at page 1. 4820 ** single child page. This can only happen with the table rooted at page 1.
4444 ** 4821 **
(...skipping 20 matching lines...) Expand all
4465 assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); 4842 assert( CURSOR_FAULT > CURSOR_REQUIRESEEK );
4466 if( pCur->eState>=CURSOR_REQUIRESEEK ){ 4843 if( pCur->eState>=CURSOR_REQUIRESEEK ){
4467 if( pCur->eState==CURSOR_FAULT ){ 4844 if( pCur->eState==CURSOR_FAULT ){
4468 assert( pCur->skipNext!=SQLITE_OK ); 4845 assert( pCur->skipNext!=SQLITE_OK );
4469 return pCur->skipNext; 4846 return pCur->skipNext;
4470 } 4847 }
4471 sqlite3BtreeClearCursor(pCur); 4848 sqlite3BtreeClearCursor(pCur);
4472 } 4849 }
4473 4850
4474 if( pCur->iPage>=0 ){ 4851 if( pCur->iPage>=0 ){
4475 while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]); 4852 while( pCur->iPage ){
4853 assert( pCur->apPage[pCur->iPage]!=0 );
4854 releasePageNotNull(pCur->apPage[pCur->iPage--]);
4855 }
4476 }else if( pCur->pgnoRoot==0 ){ 4856 }else if( pCur->pgnoRoot==0 ){
4477 pCur->eState = CURSOR_INVALID; 4857 pCur->eState = CURSOR_INVALID;
4478 return SQLITE_OK; 4858 return SQLITE_OK;
4479 }else{ 4859 }else{
4860 assert( pCur->iPage==(-1) );
4480 rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], 4861 rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0],
4481 (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); 4862 0, pCur->curPagerFlags);
4482 if( rc!=SQLITE_OK ){ 4863 if( rc!=SQLITE_OK ){
4483 pCur->eState = CURSOR_INVALID; 4864 pCur->eState = CURSOR_INVALID;
4484 return rc; 4865 return rc;
4485 } 4866 }
4486 pCur->iPage = 0; 4867 pCur->iPage = 0;
4868 pCur->curIntKey = pCur->apPage[0]->intKey;
4487 } 4869 }
4488 pRoot = pCur->apPage[0]; 4870 pRoot = pCur->apPage[0];
4489 assert( pRoot->pgno==pCur->pgnoRoot ); 4871 assert( pRoot->pgno==pCur->pgnoRoot );
4490 4872
4491 /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor 4873 /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor
4492 ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is 4874 ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is
4493 ** NULL, the caller expects a table b-tree. If this is not the case, 4875 ** NULL, the caller expects a table b-tree. If this is not the case,
4494 ** return an SQLITE_CORRUPT error. 4876 ** return an SQLITE_CORRUPT error.
4495 ** 4877 **
4496 ** Earlier versions of SQLite assumed that this test could not fail 4878 ** Earlier versions of SQLite assumed that this test could not fail
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
4660 ** *pRes<0 The cursor is left pointing at an entry that 5042 ** *pRes<0 The cursor is left pointing at an entry that
4661 ** is smaller than intKey/pIdxKey or if the table is empty 5043 ** is smaller than intKey/pIdxKey or if the table is empty
4662 ** and the cursor is therefore left point to nothing. 5044 ** and the cursor is therefore left point to nothing.
4663 ** 5045 **
4664 ** *pRes==0 The cursor is left pointing at an entry that 5046 ** *pRes==0 The cursor is left pointing at an entry that
4665 ** exactly matches intKey/pIdxKey. 5047 ** exactly matches intKey/pIdxKey.
4666 ** 5048 **
4667 ** *pRes>0 The cursor is left pointing at an entry that 5049 ** *pRes>0 The cursor is left pointing at an entry that
4668 ** is larger than intKey/pIdxKey. 5050 ** is larger than intKey/pIdxKey.
4669 ** 5051 **
5052 ** For index tables, the pIdxKey->eqSeen field is set to 1 if there
5053 ** exists an entry in the table that exactly matches pIdxKey.
4670 */ 5054 */
4671 int sqlite3BtreeMovetoUnpacked( 5055 int sqlite3BtreeMovetoUnpacked(
4672 BtCursor *pCur, /* The cursor to be moved */ 5056 BtCursor *pCur, /* The cursor to be moved */
4673 UnpackedRecord *pIdxKey, /* Unpacked index key */ 5057 UnpackedRecord *pIdxKey, /* Unpacked index key */
4674 i64 intKey, /* The table key */ 5058 i64 intKey, /* The table key */
4675 int biasRight, /* If true, bias the search to the high end */ 5059 int biasRight, /* If true, bias the search to the high end */
4676 int *pRes /* Write search results here */ 5060 int *pRes /* Write search results here */
4677 ){ 5061 ){
4678 int rc; 5062 int rc;
4679 RecordCompare xRecordCompare; 5063 RecordCompare xRecordCompare;
4680 5064
4681 assert( cursorHoldsMutex(pCur) ); 5065 assert( cursorHoldsMutex(pCur) );
4682 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); 5066 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
4683 assert( pRes ); 5067 assert( pRes );
4684 assert( (pIdxKey==0)==(pCur->pKeyInfo==0) ); 5068 assert( (pIdxKey==0)==(pCur->pKeyInfo==0) );
4685 5069
4686 /* If the cursor is already positioned at the point we are trying 5070 /* If the cursor is already positioned at the point we are trying
4687 ** to move to, then just return without doing any work */ 5071 ** to move to, then just return without doing any work */
4688 if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 5072 if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
4689 && pCur->apPage[0]->intKey 5073 && pCur->curIntKey
4690 ){ 5074 ){
4691 if( pCur->info.nKey==intKey ){ 5075 if( pCur->info.nKey==intKey ){
4692 *pRes = 0; 5076 *pRes = 0;
4693 return SQLITE_OK; 5077 return SQLITE_OK;
4694 } 5078 }
4695 if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKey<intKey ){ 5079 if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKey<intKey ){
4696 *pRes = -1; 5080 *pRes = -1;
4697 return SQLITE_OK; 5081 return SQLITE_OK;
4698 } 5082 }
4699 } 5083 }
(...skipping 14 matching lines...) Expand all
4714 return rc; 5098 return rc;
4715 } 5099 }
4716 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] ); 5100 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] );
4717 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit ); 5101 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit );
4718 assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 ); 5102 assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 );
4719 if( pCur->eState==CURSOR_INVALID ){ 5103 if( pCur->eState==CURSOR_INVALID ){
4720 *pRes = -1; 5104 *pRes = -1;
4721 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); 5105 assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
4722 return SQLITE_OK; 5106 return SQLITE_OK;
4723 } 5107 }
4724 assert( pCur->apPage[0]->intKey || pIdxKey ); 5108 assert( pCur->apPage[0]->intKey==pCur->curIntKey );
5109 assert( pCur->curIntKey || pIdxKey );
4725 for(;;){ 5110 for(;;){
4726 int lwr, upr, idx, c; 5111 int lwr, upr, idx, c;
4727 Pgno chldPg; 5112 Pgno chldPg;
4728 MemPage *pPage = pCur->apPage[pCur->iPage]; 5113 MemPage *pPage = pCur->apPage[pCur->iPage];
4729 u8 *pCell; /* Pointer to current cell in pPage */ 5114 u8 *pCell; /* Pointer to current cell in pPage */
4730 5115
4731 /* pPage->nCell must be greater than zero. If this is the root-page 5116 /* pPage->nCell must be greater than zero. If this is the root-page
4732 ** the cursor would have been INVALID above and this for(;;) loop 5117 ** the cursor would have been INVALID above and this for(;;) loop
4733 ** not run. If this is not the root-page, then the moveToChild() routine 5118 ** not run. If this is not the root-page, then the moveToChild() routine
4734 ** would have already detected db corruption. Similarly, pPage must 5119 ** would have already detected db corruption. Similarly, pPage must
4735 ** be the right kind (index or table) of b-tree page. Otherwise 5120 ** be the right kind (index or table) of b-tree page. Otherwise
4736 ** a moveToChild() or moveToRoot() call would have detected corruption. */ 5121 ** a moveToChild() or moveToRoot() call would have detected corruption. */
4737 assert( pPage->nCell>0 ); 5122 assert( pPage->nCell>0 );
4738 assert( pPage->intKey==(pIdxKey==0) ); 5123 assert( pPage->intKey==(pIdxKey==0) );
4739 lwr = 0; 5124 lwr = 0;
4740 upr = pPage->nCell-1; 5125 upr = pPage->nCell-1;
4741 assert( biasRight==0 || biasRight==1 ); 5126 assert( biasRight==0 || biasRight==1 );
4742 idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ 5127 idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */
4743 pCur->aiIdx[pCur->iPage] = (u16)idx; 5128 pCur->aiIdx[pCur->iPage] = (u16)idx;
4744 if( xRecordCompare==0 ){ 5129 if( xRecordCompare==0 ){
4745 for(;;){ 5130 for(;;){
4746 i64 nCellKey; 5131 i64 nCellKey;
4747 pCell = findCell(pPage, idx) + pPage->childPtrSize; 5132 pCell = findCellPastPtr(pPage, idx);
4748 if( pPage->intKeyLeaf ){ 5133 if( pPage->intKeyLeaf ){
4749 while( 0x80 <= *(pCell++) ){ 5134 while( 0x80 <= *(pCell++) ){
4750 if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; 5135 if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT;
4751 } 5136 }
4752 } 5137 }
4753 getVarint(pCell, (u64*)&nCellKey); 5138 getVarint(pCell, (u64*)&nCellKey);
4754 if( nCellKey<intKey ){ 5139 if( nCellKey<intKey ){
4755 lwr = idx+1; 5140 lwr = idx+1;
4756 if( lwr>upr ){ c = -1; break; } 5141 if( lwr>upr ){ c = -1; break; }
4757 }else if( nCellKey>intKey ){ 5142 }else if( nCellKey>intKey ){
(...skipping 11 matching lines...) Expand all
4769 *pRes = 0; 5154 *pRes = 0;
4770 rc = SQLITE_OK; 5155 rc = SQLITE_OK;
4771 goto moveto_finish; 5156 goto moveto_finish;
4772 } 5157 }
4773 } 5158 }
4774 assert( lwr+upr>=0 ); 5159 assert( lwr+upr>=0 );
4775 idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ 5160 idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */
4776 } 5161 }
4777 }else{ 5162 }else{
4778 for(;;){ 5163 for(;;){
4779 int nCell; 5164 int nCell; /* Size of the pCell cell in bytes */
4780 pCell = findCell(pPage, idx) + pPage->childPtrSize; 5165 pCell = findCellPastPtr(pPage, idx);
4781 5166
4782 /* The maximum supported page-size is 65536 bytes. This means that 5167 /* The maximum supported page-size is 65536 bytes. This means that
4783 ** the maximum number of record bytes stored on an index B-Tree 5168 ** the maximum number of record bytes stored on an index B-Tree
4784 ** page is less than 16384 bytes and may be stored as a 2-byte 5169 ** page is less than 16384 bytes and may be stored as a 2-byte
4785 ** varint. This information is used to attempt to avoid parsing 5170 ** varint. This information is used to attempt to avoid parsing
4786 ** the entire cell by checking for the cases where the record is 5171 ** the entire cell by checking for the cases where the record is
4787 ** stored entirely within the b-tree page by inspecting the first 5172 ** stored entirely within the b-tree page by inspecting the first
4788 ** 2 bytes of the cell. 5173 ** 2 bytes of the cell.
4789 */ 5174 */
4790 nCell = pCell[0]; 5175 nCell = pCell[0];
4791 if( nCell<=pPage->max1bytePayload ){ 5176 if( nCell<=pPage->max1bytePayload ){
4792 /* This branch runs if the record-size field of the cell is a 5177 /* This branch runs if the record-size field of the cell is a
4793 ** single byte varint and the record fits entirely on the main 5178 ** single byte varint and the record fits entirely on the main
4794 ** b-tree page. */ 5179 ** b-tree page. */
4795 testcase( pCell+nCell+1==pPage->aDataEnd ); 5180 testcase( pCell+nCell+1==pPage->aDataEnd );
4796 c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); 5181 c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
4797 }else if( !(pCell[1] & 0x80) 5182 }else if( !(pCell[1] & 0x80)
4798 && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal 5183 && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
4799 ){ 5184 ){
4800 /* The record-size field is a 2 byte varint and the record 5185 /* The record-size field is a 2 byte varint and the record
4801 ** fits entirely on the main b-tree page. */ 5186 ** fits entirely on the main b-tree page. */
4802 testcase( pCell+nCell+2==pPage->aDataEnd ); 5187 testcase( pCell+nCell+2==pPage->aDataEnd );
4803 c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); 5188 c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
4804 }else{ 5189 }else{
4805 /* The record flows over onto one or more overflow pages. In 5190 /* The record flows over onto one or more overflow pages. In
4806 ** this case the whole cell needs to be parsed, a buffer allocated 5191 ** this case the whole cell needs to be parsed, a buffer allocated
4807 ** and accessPayload() used to retrieve the record into the 5192 ** and accessPayload() used to retrieve the record into the
4808 ** buffer before VdbeRecordCompare() can be called. */ 5193 ** buffer before VdbeRecordCompare() can be called.
5194 **
5195 ** If the record is corrupt, the xRecordCompare routine may read
5196 ** up to two varints past the end of the buffer. An extra 18
5197 ** bytes of padding is allocated at the end of the buffer in
5198 ** case this happens. */
4809 void *pCellKey; 5199 void *pCellKey;
4810 u8 * const pCellBody = pCell - pPage->childPtrSize; 5200 u8 * const pCellBody = pCell - pPage->childPtrSize;
4811 btreeParseCellPtr(pPage, pCellBody, &pCur->info); 5201 pPage->xParseCell(pPage, pCellBody, &pCur->info);
4812 nCell = (int)pCur->info.nKey; 5202 nCell = (int)pCur->info.nKey;
4813 pCellKey = sqlite3Malloc( nCell ); 5203 testcase( nCell<0 ); /* True if key size is 2^32 or more */
5204 testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */
5205 testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */
5206 testcase( nCell==2 ); /* Minimum legal index key size */
5207 if( nCell<2 ){
5208 rc = SQLITE_CORRUPT_BKPT;
5209 goto moveto_finish;
5210 }
5211 pCellKey = sqlite3Malloc( nCell+18 );
4814 if( pCellKey==0 ){ 5212 if( pCellKey==0 ){
4815 rc = SQLITE_NOMEM; 5213 rc = SQLITE_NOMEM;
4816 goto moveto_finish; 5214 goto moveto_finish;
4817 } 5215 }
4818 pCur->aiIdx[pCur->iPage] = (u16)idx; 5216 pCur->aiIdx[pCur->iPage] = (u16)idx;
4819 rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); 5217 rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2);
4820 if( rc ){ 5218 if( rc ){
4821 sqlite3_free(pCellKey); 5219 sqlite3_free(pCellKey);
4822 goto moveto_finish; 5220 goto moveto_finish;
4823 } 5221 }
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
5096 5494
5097 /* 5495 /*
5098 ** Allocate a new page from the database file. 5496 ** Allocate a new page from the database file.
5099 ** 5497 **
5100 ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() 5498 ** The new page is marked as dirty. (In other words, sqlite3PagerWrite()
5101 ** has already been called on the new page.) The new page has also 5499 ** has already been called on the new page.) The new page has also
5102 ** been referenced and the calling routine is responsible for calling 5500 ** been referenced and the calling routine is responsible for calling
5103 ** sqlite3PagerUnref() on the new page when it is done. 5501 ** sqlite3PagerUnref() on the new page when it is done.
5104 ** 5502 **
5105 ** SQLITE_OK is returned on success. Any other return value indicates 5503 ** SQLITE_OK is returned on success. Any other return value indicates
5106 ** an error. *ppPage and *pPgno are undefined in the event of an error. 5504 ** an error. *ppPage is set to NULL in the event of an error.
5107 ** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
5108 ** 5505 **
5109 ** If the "nearby" parameter is not 0, then an effort is made to 5506 ** If the "nearby" parameter is not 0, then an effort is made to
5110 ** locate a page close to the page number "nearby". This can be used in an 5507 ** locate a page close to the page number "nearby". This can be used in an
5111 ** attempt to keep related pages close to each other in the database file, 5508 ** attempt to keep related pages close to each other in the database file,
5112 ** which in turn can make database access faster. 5509 ** which in turn can make database access faster.
5113 ** 5510 **
5114 ** If the eMode parameter is BTALLOC_EXACT and the nearby page exists 5511 ** If the eMode parameter is BTALLOC_EXACT and the nearby page exists
5115 ** anywhere on the free-list, then it is guaranteed to be returned. If 5512 ** anywhere on the free-list, then it is guaranteed to be returned. If
5116 ** eMode is BTALLOC_LT then the page returned will be less than or equal 5513 ** eMode is BTALLOC_LT then the page returned will be less than or equal
5117 ** to nearby if any such page exists. If eMode is BTALLOC_ANY then there 5514 ** to nearby if any such page exists. If eMode is BTALLOC_ANY then there
(...skipping 11 matching lines...) Expand all
5129 u32 n; /* Number of pages on the freelist */ 5526 u32 n; /* Number of pages on the freelist */
5130 u32 k; /* Number of leaves on the trunk of the freelist */ 5527 u32 k; /* Number of leaves on the trunk of the freelist */
5131 MemPage *pTrunk = 0; 5528 MemPage *pTrunk = 0;
5132 MemPage *pPrevTrunk = 0; 5529 MemPage *pPrevTrunk = 0;
5133 Pgno mxPage; /* Total size of the database file */ 5530 Pgno mxPage; /* Total size of the database file */
5134 5531
5135 assert( sqlite3_mutex_held(pBt->mutex) ); 5532 assert( sqlite3_mutex_held(pBt->mutex) );
5136 assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); 5533 assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) );
5137 pPage1 = pBt->pPage1; 5534 pPage1 = pBt->pPage1;
5138 mxPage = btreePagecount(pBt); 5535 mxPage = btreePagecount(pBt);
5536 /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36
5537 ** stores stores the total number of pages on the freelist. */
5139 n = get4byte(&pPage1->aData[36]); 5538 n = get4byte(&pPage1->aData[36]);
5140 testcase( n==mxPage-1 ); 5539 testcase( n==mxPage-1 );
5141 if( n>=mxPage ){ 5540 if( n>=mxPage ){
5142 return SQLITE_CORRUPT_BKPT; 5541 return SQLITE_CORRUPT_BKPT;
5143 } 5542 }
5144 if( n>0 ){ 5543 if( n>0 ){
5145 /* There are pages on the freelist. Reuse one of those pages. */ 5544 /* There are pages on the freelist. Reuse one of those pages. */
5146 Pgno iTrunk; 5545 Pgno iTrunk;
5147 u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ 5546 u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
5547 u32 nSearch = 0; /* Count of the number of search attempts */
5148 5548
5149 /* If eMode==BTALLOC_EXACT and a query of the pointer-map 5549 /* If eMode==BTALLOC_EXACT and a query of the pointer-map
5150 ** shows that the page 'nearby' is somewhere on the free-list, then 5550 ** shows that the page 'nearby' is somewhere on the free-list, then
5151 ** the entire-list will be searched for that page. 5551 ** the entire-list will be searched for that page.
5152 */ 5552 */
5153 #ifndef SQLITE_OMIT_AUTOVACUUM 5553 #ifndef SQLITE_OMIT_AUTOVACUUM
5154 if( eMode==BTALLOC_EXACT ){ 5554 if( eMode==BTALLOC_EXACT ){
5155 if( nearby<=mxPage ){ 5555 if( nearby<=mxPage ){
5156 u8 eType; 5556 u8 eType;
5157 assert( nearby>0 ); 5557 assert( nearby>0 );
(...skipping 17 matching lines...) Expand all
5175 put4byte(&pPage1->aData[36], n-1); 5575 put4byte(&pPage1->aData[36], n-1);
5176 5576
5177 /* The code within this loop is run only once if the 'searchList' variable 5577 /* The code within this loop is run only once if the 'searchList' variable
5178 ** is not true. Otherwise, it runs once for each trunk-page on the 5578 ** is not true. Otherwise, it runs once for each trunk-page on the
5179 ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) 5579 ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT)
5180 ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT) 5580 ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT)
5181 */ 5581 */
5182 do { 5582 do {
5183 pPrevTrunk = pTrunk; 5583 pPrevTrunk = pTrunk;
5184 if( pPrevTrunk ){ 5584 if( pPrevTrunk ){
5585 /* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page
5586 ** is the page number of the next freelist trunk page in the list or
5587 ** zero if this is the last freelist trunk page. */
5185 iTrunk = get4byte(&pPrevTrunk->aData[0]); 5588 iTrunk = get4byte(&pPrevTrunk->aData[0]);
5186 }else{ 5589 }else{
5590 /* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32
5591 ** stores the page number of the first page of the freelist, or zero if
5592 ** the freelist is empty. */
5187 iTrunk = get4byte(&pPage1->aData[32]); 5593 iTrunk = get4byte(&pPage1->aData[32]);
5188 } 5594 }
5189 testcase( iTrunk==mxPage ); 5595 testcase( iTrunk==mxPage );
5190 if( iTrunk>mxPage ){ 5596 if( iTrunk>mxPage || nSearch++ > n ){
5191 rc = SQLITE_CORRUPT_BKPT; 5597 rc = SQLITE_CORRUPT_BKPT;
5192 }else{ 5598 }else{
5193 rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); 5599 rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0);
5194 } 5600 }
5195 if( rc ){ 5601 if( rc ){
5196 pTrunk = 0; 5602 pTrunk = 0;
5197 goto end_allocate_page; 5603 goto end_allocate_page;
5198 } 5604 }
5199 assert( pTrunk!=0 ); 5605 assert( pTrunk!=0 );
5200 assert( pTrunk->aData!=0 ); 5606 assert( pTrunk->aData!=0 );
5201 5607 /* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page
5202 k = get4byte(&pTrunk->aData[4]); /* # of leaves on this trunk page */ 5608 ** is the number of leaf page pointers to follow. */
5609 k = get4byte(&pTrunk->aData[4]);
5203 if( k==0 && !searchList ){ 5610 if( k==0 && !searchList ){
5204 /* The trunk has no leaves and the list is not being searched. 5611 /* The trunk has no leaves and the list is not being searched.
5205 ** So extract the trunk page itself and use it as the newly 5612 ** So extract the trunk page itself and use it as the newly
5206 ** allocated page */ 5613 ** allocated page */
5207 assert( pPrevTrunk==0 ); 5614 assert( pPrevTrunk==0 );
5208 rc = sqlite3PagerWrite(pTrunk->pDbPage); 5615 rc = sqlite3PagerWrite(pTrunk->pDbPage);
5209 if( rc ){ 5616 if( rc ){
5210 goto end_allocate_page; 5617 goto end_allocate_page;
5211 } 5618 }
5212 *pPgno = iTrunk; 5619 *pPgno = iTrunk;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
5247 ** pointers to free-list leaves. The first leaf becomes a trunk 5654 ** pointers to free-list leaves. The first leaf becomes a trunk
5248 ** page in this case. 5655 ** page in this case.
5249 */ 5656 */
5250 MemPage *pNewTrunk; 5657 MemPage *pNewTrunk;
5251 Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); 5658 Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
5252 if( iNewTrunk>mxPage ){ 5659 if( iNewTrunk>mxPage ){
5253 rc = SQLITE_CORRUPT_BKPT; 5660 rc = SQLITE_CORRUPT_BKPT;
5254 goto end_allocate_page; 5661 goto end_allocate_page;
5255 } 5662 }
5256 testcase( iNewTrunk==mxPage ); 5663 testcase( iNewTrunk==mxPage );
5257 rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); 5664 rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0);
5258 if( rc!=SQLITE_OK ){ 5665 if( rc!=SQLITE_OK ){
5259 goto end_allocate_page; 5666 goto end_allocate_page;
5260 } 5667 }
5261 rc = sqlite3PagerWrite(pNewTrunk->pDbPage); 5668 rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
5262 if( rc!=SQLITE_OK ){ 5669 if( rc!=SQLITE_OK ){
5263 releasePage(pNewTrunk); 5670 releasePage(pNewTrunk);
5264 goto end_allocate_page; 5671 goto end_allocate_page;
5265 } 5672 }
5266 memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); 5673 memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
5267 put4byte(&pNewTrunk->aData[4], k-1); 5674 put4byte(&pNewTrunk->aData[4], k-1);
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
5327 TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" 5734 TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
5328 ": %d more free pages\n", 5735 ": %d more free pages\n",
5329 *pPgno, closest+1, k, pTrunk->pgno, n-1)); 5736 *pPgno, closest+1, k, pTrunk->pgno, n-1));
5330 rc = sqlite3PagerWrite(pTrunk->pDbPage); 5737 rc = sqlite3PagerWrite(pTrunk->pDbPage);
5331 if( rc ) goto end_allocate_page; 5738 if( rc ) goto end_allocate_page;
5332 if( closest<k-1 ){ 5739 if( closest<k-1 ){
5333 memcpy(&aData[8+closest*4], &aData[4+k*4], 4); 5740 memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
5334 } 5741 }
5335 put4byte(&aData[4], k-1); 5742 put4byte(&aData[4], k-1);
5336 noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0; 5743 noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0;
5337 rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); 5744 rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, noContent);
5338 if( rc==SQLITE_OK ){ 5745 if( rc==SQLITE_OK ){
5339 rc = sqlite3PagerWrite((*ppPage)->pDbPage); 5746 rc = sqlite3PagerWrite((*ppPage)->pDbPage);
5340 if( rc!=SQLITE_OK ){ 5747 if( rc!=SQLITE_OK ){
5341 releasePage(*ppPage); 5748 releasePage(*ppPage);
5749 *ppPage = 0;
5342 } 5750 }
5343 } 5751 }
5344 searchList = 0; 5752 searchList = 0;
5345 } 5753 }
5346 } 5754 }
5347 releasePage(pPrevTrunk); 5755 releasePage(pPrevTrunk);
5348 pPrevTrunk = 0; 5756 pPrevTrunk = 0;
5349 }while( searchList ); 5757 }while( searchList );
5350 }else{ 5758 }else{
5351 /* There are no pages on the freelist, so append a new page to the 5759 /* There are no pages on the freelist, so append a new page to the
(...skipping 23 matching lines...) Expand all
5375 5783
5376 #ifndef SQLITE_OMIT_AUTOVACUUM 5784 #ifndef SQLITE_OMIT_AUTOVACUUM
5377 if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){ 5785 if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){
5378 /* If *pPgno refers to a pointer-map page, allocate two new pages 5786 /* If *pPgno refers to a pointer-map page, allocate two new pages
5379 ** at the end of the file instead of one. The first allocated page 5787 ** at the end of the file instead of one. The first allocated page
5380 ** becomes a new pointer-map page, the second is used by the caller. 5788 ** becomes a new pointer-map page, the second is used by the caller.
5381 */ 5789 */
5382 MemPage *pPg = 0; 5790 MemPage *pPg = 0;
5383 TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); 5791 TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
5384 assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); 5792 assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
5385 rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); 5793 rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent);
5386 if( rc==SQLITE_OK ){ 5794 if( rc==SQLITE_OK ){
5387 rc = sqlite3PagerWrite(pPg->pDbPage); 5795 rc = sqlite3PagerWrite(pPg->pDbPage);
5388 releasePage(pPg); 5796 releasePage(pPg);
5389 } 5797 }
5390 if( rc ) return rc; 5798 if( rc ) return rc;
5391 pBt->nPage++; 5799 pBt->nPage++;
5392 if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; } 5800 if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
5393 } 5801 }
5394 #endif 5802 #endif
5395 put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage); 5803 put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage);
5396 *pPgno = pBt->nPage; 5804 *pPgno = pBt->nPage;
5397 5805
5398 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); 5806 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
5399 rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); 5807 rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent);
5400 if( rc ) return rc; 5808 if( rc ) return rc;
5401 rc = sqlite3PagerWrite((*ppPage)->pDbPage); 5809 rc = sqlite3PagerWrite((*ppPage)->pDbPage);
5402 if( rc!=SQLITE_OK ){ 5810 if( rc!=SQLITE_OK ){
5403 releasePage(*ppPage); 5811 releasePage(*ppPage);
5812 *ppPage = 0;
5404 } 5813 }
5405 TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); 5814 TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
5406 } 5815 }
5407 5816
5408 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); 5817 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
5409 5818
5410 end_allocate_page: 5819 end_allocate_page:
5411 releasePage(pTrunk); 5820 releasePage(pTrunk);
5412 releasePage(pPrevTrunk); 5821 releasePage(pPrevTrunk);
5413 if( rc==SQLITE_OK ){ 5822 assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 );
5414 if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ 5823 assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 );
5415 releasePage(*ppPage);
5416 *ppPage = 0;
5417 return SQLITE_CORRUPT_BKPT;
5418 }
5419 (*ppPage)->isInit = 0;
5420 }else{
5421 *ppPage = 0;
5422 }
5423 assert( rc!=SQLITE_OK || sqlite3PagerIswriteable((*ppPage)->pDbPage) );
5424 return rc; 5824 return rc;
5425 } 5825 }
5426 5826
5427 /* 5827 /*
5428 ** This function is used to add page iPage to the database file free-list. 5828 ** This function is used to add page iPage to the database file free-list.
5429 ** It is assumed that the page is not already a part of the free-list. 5829 ** It is assumed that the page is not already a part of the free-list.
5430 ** 5830 **
5431 ** The value passed as the second argument to this function is optional. 5831 ** The value passed as the second argument to this function is optional.
5432 ** If the caller happens to have a pointer to the MemPage object 5832 ** If the caller happens to have a pointer to the MemPage object
5433 ** corresponding to page iPage handy, it may pass it as the second value. 5833 ** corresponding to page iPage handy, it may pass it as the second value.
5434 ** Otherwise, it may pass NULL. 5834 ** Otherwise, it may pass NULL.
5435 ** 5835 **
5436 ** If a pointer to a MemPage object is passed as the second argument, 5836 ** If a pointer to a MemPage object is passed as the second argument,
5437 ** its reference count is not altered by this function. 5837 ** its reference count is not altered by this function.
5438 */ 5838 */
5439 static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ 5839 static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){
5440 MemPage *pTrunk = 0; /* Free-list trunk page */ 5840 MemPage *pTrunk = 0; /* Free-list trunk page */
5441 Pgno iTrunk = 0; /* Page number of free-list trunk page */ 5841 Pgno iTrunk = 0; /* Page number of free-list trunk page */
5442 MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */ 5842 MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */
5443 MemPage *pPage; /* Page being freed. May be NULL. */ 5843 MemPage *pPage; /* Page being freed. May be NULL. */
5444 int rc; /* Return Code */ 5844 int rc; /* Return Code */
5445 int nFree; /* Initial number of pages on free-list */ 5845 int nFree; /* Initial number of pages on free-list */
5446 5846
5447 assert( sqlite3_mutex_held(pBt->mutex) ); 5847 assert( sqlite3_mutex_held(pBt->mutex) );
5448 assert( iPage>1 ); 5848 assert( CORRUPT_DB || iPage>1 );
5449 assert( !pMemPage || pMemPage->pgno==iPage ); 5849 assert( !pMemPage || pMemPage->pgno==iPage );
5450 5850
5851 if( iPage<2 ) return SQLITE_CORRUPT_BKPT;
5451 if( pMemPage ){ 5852 if( pMemPage ){
5452 pPage = pMemPage; 5853 pPage = pMemPage;
5453 sqlite3PagerRef(pPage->pDbPage); 5854 sqlite3PagerRef(pPage->pDbPage);
5454 }else{ 5855 }else{
5455 pPage = btreePageLookup(pBt, iPage); 5856 pPage = btreePageLookup(pBt, iPage);
5456 } 5857 }
5457 5858
5458 /* Increment the free page count on pPage1 */ 5859 /* Increment the free page count on pPage1 */
5459 rc = sqlite3PagerWrite(pPage1->pDbPage); 5860 rc = sqlite3PagerWrite(pPage1->pDbPage);
5460 if( rc ) goto freepage_out; 5861 if( rc ) goto freepage_out;
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
5510 ** Note that the trunk page is not really full until it contains 5911 ** Note that the trunk page is not really full until it contains
5511 ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have 5912 ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have
5512 ** coded. But due to a coding error in versions of SQLite prior to 5913 ** coded. But due to a coding error in versions of SQLite prior to
5513 ** 3.6.0, databases with freelist trunk pages holding more than 5914 ** 3.6.0, databases with freelist trunk pages holding more than
5514 ** usableSize/4 - 8 entries will be reported as corrupt. In order 5915 ** usableSize/4 - 8 entries will be reported as corrupt. In order
5515 ** to maintain backwards compatibility with older versions of SQLite, 5916 ** to maintain backwards compatibility with older versions of SQLite,
5516 ** we will continue to restrict the number of entries to usableSize/4 - 8 5917 ** we will continue to restrict the number of entries to usableSize/4 - 8
5517 ** for now. At some point in the future (once everyone has upgraded 5918 ** for now. At some point in the future (once everyone has upgraded
5518 ** to 3.6.0 or later) we should consider fixing the conditional above 5919 ** to 3.6.0 or later) we should consider fixing the conditional above
5519 ** to read "usableSize/4-2" instead of "usableSize/4-8". 5920 ** to read "usableSize/4-2" instead of "usableSize/4-8".
5921 **
5922 ** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still
5923 ** avoid using the last six entries in the freelist trunk page array in
5924 ** order that database files created by newer versions of SQLite can be
5925 ** read by older versions of SQLite.
5520 */ 5926 */
5521 rc = sqlite3PagerWrite(pTrunk->pDbPage); 5927 rc = sqlite3PagerWrite(pTrunk->pDbPage);
5522 if( rc==SQLITE_OK ){ 5928 if( rc==SQLITE_OK ){
5523 put4byte(&pTrunk->aData[4], nLeaf+1); 5929 put4byte(&pTrunk->aData[4], nLeaf+1);
5524 put4byte(&pTrunk->aData[8+nLeaf*4], iPage); 5930 put4byte(&pTrunk->aData[8+nLeaf*4], iPage);
5525 if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){ 5931 if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){
5526 sqlite3PagerDontWrite(pPage->pDbPage); 5932 sqlite3PagerDontWrite(pPage->pDbPage);
5527 } 5933 }
5528 rc = btreeSetHasContent(pBt, iPage); 5934 rc = btreeSetHasContent(pBt, iPage);
5529 } 5935 }
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
5575 u16 *pnSize /* Write the size of the Cell here */ 5981 u16 *pnSize /* Write the size of the Cell here */
5576 ){ 5982 ){
5577 BtShared *pBt = pPage->pBt; 5983 BtShared *pBt = pPage->pBt;
5578 CellInfo info; 5984 CellInfo info;
5579 Pgno ovflPgno; 5985 Pgno ovflPgno;
5580 int rc; 5986 int rc;
5581 int nOvfl; 5987 int nOvfl;
5582 u32 ovflPageSize; 5988 u32 ovflPageSize;
5583 5989
5584 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 5990 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
5585 btreeParseCellPtr(pPage, pCell, &info); 5991 pPage->xParseCell(pPage, pCell, &info);
5586 *pnSize = info.nSize; 5992 *pnSize = info.nSize;
5587 if( info.iOverflow==0 ){ 5993 if( info.nLocal==info.nPayload ){
5588 return SQLITE_OK; /* No overflow pages. Return without doing anything */ 5994 return SQLITE_OK; /* No overflow pages. Return without doing anything */
5589 } 5995 }
5590 if( pCell+info.iOverflow+3 > pPage->aData+pPage->maskPage ){ 5996 if( pCell+info.nSize-1 > pPage->aData+pPage->maskPage ){
5591 return SQLITE_CORRUPT_BKPT; /* Cell extends past end of page */ 5997 return SQLITE_CORRUPT_BKPT; /* Cell extends past end of page */
5592 } 5998 }
5593 ovflPgno = get4byte(&pCell[info.iOverflow]); 5999 ovflPgno = get4byte(pCell + info.nSize - 4);
5594 assert( pBt->usableSize > 4 ); 6000 assert( pBt->usableSize > 4 );
5595 ovflPageSize = pBt->usableSize - 4; 6001 ovflPageSize = pBt->usableSize - 4;
5596 nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; 6002 nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize;
5597 assert( ovflPgno==0 || nOvfl>0 ); 6003 assert( nOvfl>0 ||
6004 (CORRUPT_DB && (info.nPayload + ovflPageSize)<ovflPageSize)
6005 );
5598 while( nOvfl-- ){ 6006 while( nOvfl-- ){
5599 Pgno iNext = 0; 6007 Pgno iNext = 0;
5600 MemPage *pOvfl = 0; 6008 MemPage *pOvfl = 0;
5601 if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){ 6009 if( ovflPgno<2 || ovflPgno>btreePagecount(pBt) ){
5602 /* 0 is not a legal page number and page 1 cannot be an 6010 /* 0 is not a legal page number and page 1 cannot be an
5603 ** overflow page. Therefore if ovflPgno<2 or past the end of the 6011 ** overflow page. Therefore if ovflPgno<2 or past the end of the
5604 ** file the database must be corrupt. */ 6012 ** file the database must be corrupt. */
5605 return SQLITE_CORRUPT_BKPT; 6013 return SQLITE_CORRUPT_BKPT;
5606 } 6014 }
5607 if( nOvfl ){ 6015 if( nOvfl ){
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
5685 assert( nZero==0 ); 6093 assert( nZero==0 );
5686 } 6094 }
5687 nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); 6095 nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
5688 6096
5689 /* Fill in the payload size */ 6097 /* Fill in the payload size */
5690 if( pPage->intKey ){ 6098 if( pPage->intKey ){
5691 pSrc = pData; 6099 pSrc = pData;
5692 nSrc = nData; 6100 nSrc = nData;
5693 nData = 0; 6101 nData = 0;
5694 }else{ 6102 }else{
5695 if( NEVER(nKey>0x7fffffff || pKey==0) ){ 6103 assert( nKey<=0x7fffffff && pKey!=0 );
5696 return SQLITE_CORRUPT_BKPT;
5697 }
5698 nPayload = (int)nKey; 6104 nPayload = (int)nKey;
5699 pSrc = pKey; 6105 pSrc = pKey;
5700 nSrc = (int)nKey; 6106 nSrc = (int)nKey;
5701 } 6107 }
5702 if( nPayload<=pPage->maxLocal ){ 6108 if( nPayload<=pPage->maxLocal ){
5703 n = nHeader + nPayload; 6109 n = nHeader + nPayload;
5704 testcase( n==3 ); 6110 testcase( n==3 );
5705 testcase( n==4 ); 6111 testcase( n==4 );
5706 if( n<4 ) n = 4; 6112 if( n<4 ) n = 4;
5707 *pnSize = n; 6113 *pnSize = n;
(...skipping 19 matching lines...) Expand all
5727 ** that means content must spill into overflow pages. 6133 ** that means content must spill into overflow pages.
5728 ** *pnSize Size of the local cell (not counting overflow pages) 6134 ** *pnSize Size of the local cell (not counting overflow pages)
5729 ** pPrior Where to write the pgno of the first overflow page 6135 ** pPrior Where to write the pgno of the first overflow page
5730 ** 6136 **
5731 ** Use a call to btreeParseCellPtr() to verify that the values above 6137 ** Use a call to btreeParseCellPtr() to verify that the values above
5732 ** were computed correctly. 6138 ** were computed correctly.
5733 */ 6139 */
5734 #if SQLITE_DEBUG 6140 #if SQLITE_DEBUG
5735 { 6141 {
5736 CellInfo info; 6142 CellInfo info;
5737 btreeParseCellPtr(pPage, pCell, &info); 6143 pPage->xParseCell(pPage, pCell, &info);
5738 assert( nHeader=(int)(info.pPayload - pCell) ); 6144 assert( nHeader=(int)(info.pPayload - pCell) );
5739 assert( info.nKey==nKey ); 6145 assert( info.nKey==nKey );
5740 assert( *pnSize == info.nSize ); 6146 assert( *pnSize == info.nSize );
5741 assert( spaceLeft == info.nLocal ); 6147 assert( spaceLeft == info.nLocal );
5742 assert( pPrior == &pCell[info.iOverflow] );
5743 } 6148 }
5744 #endif 6149 #endif
5745 6150
5746 /* Write the payload into the local Cell and any extra into overflow pages */ 6151 /* Write the payload into the local Cell and any extra into overflow pages */
5747 while( nPayload>0 ){ 6152 while( nPayload>0 ){
5748 if( spaceLeft==0 ){ 6153 if( spaceLeft==0 ){
5749 #ifndef SQLITE_OMIT_AUTOVACUUM 6154 #ifndef SQLITE_OMIT_AUTOVACUUM
5750 Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ 6155 Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
5751 if( pBt->autoVacuum ){ 6156 if( pBt->autoVacuum ){
5752 do{ 6157 do{
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
5842 static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ 6247 static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){
5843 u32 pc; /* Offset to cell content of cell being deleted */ 6248 u32 pc; /* Offset to cell content of cell being deleted */
5844 u8 *data; /* pPage->aData */ 6249 u8 *data; /* pPage->aData */
5845 u8 *ptr; /* Used to move bytes around within data[] */ 6250 u8 *ptr; /* Used to move bytes around within data[] */
5846 int rc; /* The return code */ 6251 int rc; /* The return code */
5847 int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ 6252 int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */
5848 6253
5849 if( *pRC ) return; 6254 if( *pRC ) return;
5850 6255
5851 assert( idx>=0 && idx<pPage->nCell ); 6256 assert( idx>=0 && idx<pPage->nCell );
5852 assert( sz==cellSize(pPage, idx) ); 6257 assert( CORRUPT_DB || sz==cellSize(pPage, idx) );
5853 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 6258 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
5854 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6259 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
5855 data = pPage->aData; 6260 data = pPage->aData;
5856 ptr = &pPage->aCellIdx[2*idx]; 6261 ptr = &pPage->aCellIdx[2*idx];
5857 pc = get2byte(ptr); 6262 pc = get2byte(ptr);
5858 hdr = pPage->hdrOffset; 6263 hdr = pPage->hdrOffset;
5859 testcase( pc==get2byte(&data[hdr+5]) ); 6264 testcase( pc==get2byte(&data[hdr+5]) );
5860 testcase( pc+sz==pPage->pBt->usableSize ); 6265 testcase( pc+sz==pPage->pBt->usableSize );
5861 if( pc < (u32)get2byte(&data[hdr+5]) || pc+sz > pPage->pBt->usableSize ){ 6266 if( pc < (u32)get2byte(&data[hdr+5]) || pc+sz > pPage->pBt->usableSize ){
5862 *pRC = SQLITE_CORRUPT_BKPT; 6267 *pRC = SQLITE_CORRUPT_BKPT;
5863 return; 6268 return;
5864 } 6269 }
5865 rc = freeSpace(pPage, pc, sz); 6270 rc = freeSpace(pPage, pc, sz);
5866 if( rc ){ 6271 if( rc ){
5867 *pRC = rc; 6272 *pRC = rc;
5868 return; 6273 return;
5869 } 6274 }
5870 pPage->nCell--; 6275 pPage->nCell--;
5871 memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); 6276 if( pPage->nCell==0 ){
5872 put2byte(&data[hdr+3], pPage->nCell); 6277 memset(&data[hdr+1], 0, 4);
5873 pPage->nFree += 2; 6278 data[hdr+7] = 0;
6279 put2byte(&data[hdr+5], pPage->pBt->usableSize);
6280 pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset
6281 - pPage->childPtrSize - 8;
6282 }else{
6283 memmove(ptr, ptr+2, 2*(pPage->nCell - idx));
6284 put2byte(&data[hdr+3], pPage->nCell);
6285 pPage->nFree += 2;
6286 }
5874 } 6287 }
5875 6288
5876 /* 6289 /*
5877 ** Insert a new cell on pPage at cell index "i". pCell points to the 6290 ** Insert a new cell on pPage at cell index "i". pCell points to the
5878 ** content of the cell. 6291 ** content of the cell.
5879 ** 6292 **
5880 ** If the cell content will fit on the page, then put it there. If it 6293 ** If the cell content will fit on the page, then put it there. If it
5881 ** will not fit, then make a copy of the cell content into pTemp if 6294 ** will not fit, then make a copy of the cell content into pTemp if
5882 ** pTemp is not null. Regardless of pTemp, allocate a new entry 6295 ** pTemp is not null. Regardless of pTemp, allocate a new entry
5883 ** in pPage->apOvfl[] and make it point to the cell content (either 6296 ** in pPage->apOvfl[] and make it point to the cell content (either
5884 ** in pTemp or the original pCell) and also record its index. 6297 ** in pTemp or the original pCell) and also record its index.
5885 ** Allocating a new entry in pPage->aCell[] implies that 6298 ** Allocating a new entry in pPage->aCell[] implies that
5886 ** pPage->nOverflow is incremented. 6299 ** pPage->nOverflow is incremented.
5887 */ 6300 */
5888 static void insertCell( 6301 static void insertCell(
5889 MemPage *pPage, /* Page into which we are copying */ 6302 MemPage *pPage, /* Page into which we are copying */
5890 int i, /* New cell becomes the i-th cell of the page */ 6303 int i, /* New cell becomes the i-th cell of the page */
5891 u8 *pCell, /* Content of the new cell */ 6304 u8 *pCell, /* Content of the new cell */
5892 int sz, /* Bytes of content in pCell */ 6305 int sz, /* Bytes of content in pCell */
5893 u8 *pTemp, /* Temp storage space for pCell, if needed */ 6306 u8 *pTemp, /* Temp storage space for pCell, if needed */
5894 Pgno iChild, /* If non-zero, replace first 4 bytes with this value */ 6307 Pgno iChild, /* If non-zero, replace first 4 bytes with this value */
5895 int *pRC /* Read and write return code from here */ 6308 int *pRC /* Read and write return code from here */
5896 ){ 6309 ){
5897 int idx = 0; /* Where to write new cell content in data[] */ 6310 int idx = 0; /* Where to write new cell content in data[] */
5898 int j; /* Loop counter */ 6311 int j; /* Loop counter */
5899 int end; /* First byte past the last cell pointer in data[] */
5900 int ins; /* Index in data[] where new cell pointer is inserted */
5901 int cellOffset; /* Address of first cell pointer in data[] */
5902 u8 *data; /* The content of the whole page */ 6312 u8 *data; /* The content of the whole page */
6313 u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */
5903 6314
5904 if( *pRC ) return; 6315 if( *pRC ) return;
5905 6316
5906 assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); 6317 assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
5907 assert( MX_CELL(pPage->pBt)<=10921 ); 6318 assert( MX_CELL(pPage->pBt)<=10921 );
5908 assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); 6319 assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB );
5909 assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); 6320 assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) );
5910 assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); 6321 assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) );
5911 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6322 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
5912 /* The cell should normally be sized correctly. However, when moving a 6323 /* The cell should normally be sized correctly. However, when moving a
5913 ** malformed cell from a leaf page to an interior page, if the cell size 6324 ** malformed cell from a leaf page to an interior page, if the cell size
5914 ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size 6325 ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size
5915 ** might be less than 8 (leaf-size + pointer) on the interior node. Hence 6326 ** might be less than 8 (leaf-size + pointer) on the interior node. Hence
5916 ** the term after the || in the following assert(). */ 6327 ** the term after the || in the following assert(). */
5917 assert( sz==cellSizePtr(pPage, pCell) || (sz==8 && iChild>0) ); 6328 assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) );
5918 if( pPage->nOverflow || sz+2>pPage->nFree ){ 6329 if( pPage->nOverflow || sz+2>pPage->nFree ){
5919 if( pTemp ){ 6330 if( pTemp ){
5920 memcpy(pTemp, pCell, sz); 6331 memcpy(pTemp, pCell, sz);
5921 pCell = pTemp; 6332 pCell = pTemp;
5922 } 6333 }
5923 if( iChild ){ 6334 if( iChild ){
5924 put4byte(pCell, iChild); 6335 put4byte(pCell, iChild);
5925 } 6336 }
5926 j = pPage->nOverflow++; 6337 j = pPage->nOverflow++;
5927 assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); 6338 assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) );
5928 pPage->apOvfl[j] = pCell; 6339 pPage->apOvfl[j] = pCell;
5929 pPage->aiOvfl[j] = (u16)i; 6340 pPage->aiOvfl[j] = (u16)i;
6341
6342 /* When multiple overflows occur, they are always sequential and in
6343 ** sorted order. This invariants arise because multiple overflows can
6344 ** only occur when inserting divider cells into the parent page during
6345 ** balancing, and the dividers are adjacent and sorted.
6346 */
6347 assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */
6348 assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */
5930 }else{ 6349 }else{
5931 int rc = sqlite3PagerWrite(pPage->pDbPage); 6350 int rc = sqlite3PagerWrite(pPage->pDbPage);
5932 if( rc!=SQLITE_OK ){ 6351 if( rc!=SQLITE_OK ){
5933 *pRC = rc; 6352 *pRC = rc;
5934 return; 6353 return;
5935 } 6354 }
5936 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 6355 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
5937 data = pPage->aData; 6356 data = pPage->aData;
5938 cellOffset = pPage->cellOffset; 6357 assert( &data[pPage->cellOffset]==pPage->aCellIdx );
5939 end = cellOffset + 2*pPage->nCell;
5940 ins = cellOffset + 2*i;
5941 rc = allocateSpace(pPage, sz, &idx); 6358 rc = allocateSpace(pPage, sz, &idx);
5942 if( rc ){ *pRC = rc; return; } 6359 if( rc ){ *pRC = rc; return; }
5943 /* The allocateSpace() routine guarantees the following two properties 6360 /* The allocateSpace() routine guarantees the following properties
5944 ** if it returns success */ 6361 ** if it returns successfully */
5945 assert( idx >= end+2 ); 6362 assert( idx >= 0 );
6363 assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
5946 assert( idx+sz <= (int)pPage->pBt->usableSize ); 6364 assert( idx+sz <= (int)pPage->pBt->usableSize );
5947 pPage->nCell++;
5948 pPage->nFree -= (u16)(2 + sz); 6365 pPage->nFree -= (u16)(2 + sz);
5949 memcpy(&data[idx], pCell, sz); 6366 memcpy(&data[idx], pCell, sz);
5950 if( iChild ){ 6367 if( iChild ){
5951 put4byte(&data[idx], iChild); 6368 put4byte(&data[idx], iChild);
5952 } 6369 }
5953 memmove(&data[ins+2], &data[ins], end-ins); 6370 pIns = pPage->aCellIdx + i*2;
5954 put2byte(&data[ins], idx); 6371 memmove(pIns+2, pIns, 2*(pPage->nCell - i));
5955 put2byte(&data[pPage->hdrOffset+3], pPage->nCell); 6372 put2byte(pIns, idx);
6373 pPage->nCell++;
6374 /* increment the cell count */
6375 if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++;
6376 assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell );
5956 #ifndef SQLITE_OMIT_AUTOVACUUM 6377 #ifndef SQLITE_OMIT_AUTOVACUUM
5957 if( pPage->pBt->autoVacuum ){ 6378 if( pPage->pBt->autoVacuum ){
5958 /* The cell may contain a pointer to an overflow page. If so, write 6379 /* The cell may contain a pointer to an overflow page. If so, write
5959 ** the entry for the overflow page into the pointer map. 6380 ** the entry for the overflow page into the pointer map.
5960 */ 6381 */
5961 ptrmapPutOvflPtr(pPage, pCell, pRC); 6382 ptrmapPutOvflPtr(pPage, pCell, pRC);
5962 } 6383 }
5963 #endif 6384 #endif
5964 } 6385 }
5965 } 6386 }
5966 6387
5967 /* 6388 /*
5968 ** Add a list of cells to a page. The page should be initially empty. 6389 ** A CellArray object contains a cache of pointers and sizes for a
5969 ** The cells are guaranteed to fit on the page. 6390 ** consecutive sequence of cells that might be held multiple pages.
5970 */ 6391 */
5971 static void assemblePage( 6392 typedef struct CellArray CellArray;
5972 MemPage *pPage, /* The page to be assembled */ 6393 struct CellArray {
5973 int nCell, /* The number of cells to add to this page */ 6394 int nCell; /* Number of cells in apCell[] */
5974 u8 **apCell, /* Pointers to cell bodies */ 6395 MemPage *pRef; /* Reference page */
5975 u16 *aSize /* Sizes of the cells */ 6396 u8 **apCell; /* All cells begin balanced */
6397 u16 *szCell; /* Local size of all cells in apCell[] */
6398 };
6399
6400 /*
6401 ** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been
6402 ** computed.
6403 */
6404 static void populateCellCache(CellArray *p, int idx, int N){
6405 assert( idx>=0 && idx+N<=p->nCell );
6406 while( N>0 ){
6407 assert( p->apCell[idx]!=0 );
6408 if( p->szCell[idx]==0 ){
6409 p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]);
6410 }else{
6411 assert( CORRUPT_DB ||
6412 p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) );
6413 }
6414 idx++;
6415 N--;
6416 }
6417 }
6418
6419 /*
6420 ** Return the size of the Nth element of the cell array
6421 */
6422 static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){
6423 assert( N>=0 && N<p->nCell );
6424 assert( p->szCell[N]==0 );
6425 p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]);
6426 return p->szCell[N];
6427 }
6428 static u16 cachedCellSize(CellArray *p, int N){
6429 assert( N>=0 && N<p->nCell );
6430 if( p->szCell[N] ) return p->szCell[N];
6431 return computeCellSize(p, N);
6432 }
6433
6434 /*
6435 ** Array apCell[] contains pointers to nCell b-tree page cells. The
6436 ** szCell[] array contains the size in bytes of each cell. This function
6437 ** replaces the current contents of page pPg with the contents of the cell
6438 ** array.
6439 **
6440 ** Some of the cells in apCell[] may currently be stored in pPg. This
6441 ** function works around problems caused by this by making a copy of any
6442 ** such cells before overwriting the page data.
6443 **
6444 ** The MemPage.nFree field is invalidated by this function. It is the
6445 ** responsibility of the caller to set it correctly.
6446 */
6447 static int rebuildPage(
6448 MemPage *pPg, /* Edit this page */
6449 int nCell, /* Final number of cells on page */
6450 u8 **apCell, /* Array of cells */
6451 u16 *szCell /* Array of cell sizes */
5976 ){ 6452 ){
5977 int i; /* Loop counter */ 6453 const int hdr = pPg->hdrOffset; /* Offset of header on pPg */
5978 u8 *pCellptr; /* Address of next cell pointer */ 6454 u8 * const aData = pPg->aData; /* Pointer to data for pPg */
5979 int cellbody; /* Address of next cell body */ 6455 const int usableSize = pPg->pBt->usableSize;
5980 u8 * const data = pPage->aData; /* Pointer to data for pPage */ 6456 u8 * const pEnd = &aData[usableSize];
5981 const int hdr = pPage->hdrOffset; /* Offset of header on pPage */ 6457 int i;
5982 const int nUsable = pPage->pBt->usableSize; /* Usable size of page */ 6458 u8 *pCellptr = pPg->aCellIdx;
5983 6459 u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
5984 assert( pPage->nOverflow==0 ); 6460 u8 *pData;
5985 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6461
5986 assert( nCell>=0 && nCell<=(int)MX_CELL(pPage->pBt) 6462 i = get2byte(&aData[hdr+5]);
5987 && (int)MX_CELL(pPage->pBt)<=10921); 6463 memcpy(&pTmp[i], &aData[i], usableSize - i);
5988 assert( sqlite3PagerIswriteable(pPage->pDbPage) ); 6464
5989 6465 pData = pEnd;
5990 /* Check that the page has just been zeroed by zeroPage() */ 6466 for(i=0; i<nCell; i++){
5991 assert( pPage->nCell==0 ); 6467 u8 *pCell = apCell[i];
5992 assert( get2byteNotZero(&data[hdr+5])==nUsable ); 6468 if( SQLITE_WITHIN(pCell,aData,pEnd) ){
5993 6469 pCell = &pTmp[pCell - aData];
5994 pCellptr = &pPage->aCellIdx[nCell*2]; 6470 }
5995 cellbody = nUsable; 6471 pData -= szCell[i];
5996 for(i=nCell-1; i>=0; i--){ 6472 put2byte(pCellptr, (pData - aData));
5997 u16 sz = aSize[i]; 6473 pCellptr += 2;
5998 pCellptr -= 2; 6474 if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT;
5999 cellbody -= sz; 6475 memcpy(pData, pCell, szCell[i]);
6000 put2byte(pCellptr, cellbody); 6476 assert( szCell[i]==pPg->xCellSize(pPg, pCell) || CORRUPT_DB );
6001 memcpy(&data[cellbody], apCell[i], sz); 6477 testcase( szCell[i]!=pPg->xCellSize(pPg,pCell) );
6002 } 6478 }
6003 put2byte(&data[hdr+3], nCell); 6479
6004 put2byte(&data[hdr+5], cellbody); 6480 /* The pPg->nFree field is now set incorrectly. The caller will fix it. */
6005 pPage->nFree -= (nCell*2 + nUsable - cellbody); 6481 pPg->nCell = nCell;
6006 pPage->nCell = (u16)nCell; 6482 pPg->nOverflow = 0;
6007 } 6483
6008 6484 put2byte(&aData[hdr+1], 0);
6009 /* 6485 put2byte(&aData[hdr+3], pPg->nCell);
6486 put2byte(&aData[hdr+5], pData - aData);
6487 aData[hdr+7] = 0x00;
6488 return SQLITE_OK;
6489 }
6490
6491 /*
6492 ** Array apCell[] contains nCell pointers to b-tree cells. Array szCell
6493 ** contains the size in bytes of each such cell. This function attempts to
6494 ** add the cells stored in the array to page pPg. If it cannot (because
6495 ** the page needs to be defragmented before the cells will fit), non-zero
6496 ** is returned. Otherwise, if the cells are added successfully, zero is
6497 ** returned.
6498 **
6499 ** Argument pCellptr points to the first entry in the cell-pointer array
6500 ** (part of page pPg) to populate. After cell apCell[0] is written to the
6501 ** page body, a 16-bit offset is written to pCellptr. And so on, for each
6502 ** cell in the array. It is the responsibility of the caller to ensure
6503 ** that it is safe to overwrite this part of the cell-pointer array.
6504 **
6505 ** When this function is called, *ppData points to the start of the
6506 ** content area on page pPg. If the size of the content area is extended,
6507 ** *ppData is updated to point to the new start of the content area
6508 ** before returning.
6509 **
6510 ** Finally, argument pBegin points to the byte immediately following the
6511 ** end of the space required by this page for the cell-pointer area (for
6512 ** all cells - not just those inserted by the current call). If the content
6513 ** area must be extended to before this point in order to accomodate all
6514 ** cells in apCell[], then the cells do not fit and non-zero is returned.
6515 */
6516 static int pageInsertArray(
6517 MemPage *pPg, /* Page to add cells to */
6518 u8 *pBegin, /* End of cell-pointer array */
6519 u8 **ppData, /* IN/OUT: Page content -area pointer */
6520 u8 *pCellptr, /* Pointer to cell-pointer area */
6521 int iFirst, /* Index of first cell to add */
6522 int nCell, /* Number of cells to add to pPg */
6523 CellArray *pCArray /* Array of cells */
6524 ){
6525 int i;
6526 u8 *aData = pPg->aData;
6527 u8 *pData = *ppData;
6528 int iEnd = iFirst + nCell;
6529 assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */
6530 for(i=iFirst; i<iEnd; i++){
6531 int sz, rc;
6532 u8 *pSlot;
6533 sz = cachedCellSize(pCArray, i);
6534 if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){
6535 pData -= sz;
6536 if( pData<pBegin ) return 1;
6537 pSlot = pData;
6538 }
6539 /* pSlot and pCArray->apCell[i] will never overlap on a well-formed
6540 ** database. But they might for a corrupt database. Hence use memmove()
6541 ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */
6542 assert( (pSlot+sz)<=pCArray->apCell[i]
6543 || pSlot>=(pCArray->apCell[i]+sz)
6544 || CORRUPT_DB );
6545 memmove(pSlot, pCArray->apCell[i], sz);
6546 put2byte(pCellptr, (pSlot - aData));
6547 pCellptr += 2;
6548 }
6549 *ppData = pData;
6550 return 0;
6551 }
6552
6553 /*
6554 ** Array apCell[] contains nCell pointers to b-tree cells. Array szCell
6555 ** contains the size in bytes of each such cell. This function adds the
6556 ** space associated with each cell in the array that is currently stored
6557 ** within the body of pPg to the pPg free-list. The cell-pointers and other
6558 ** fields of the page are not updated.
6559 **
6560 ** This function returns the total number of cells added to the free-list.
6561 */
6562 static int pageFreeArray(
6563 MemPage *pPg, /* Page to edit */
6564 int iFirst, /* First cell to delete */
6565 int nCell, /* Cells to delete */
6566 CellArray *pCArray /* Array of cells */
6567 ){
6568 u8 * const aData = pPg->aData;
6569 u8 * const pEnd = &aData[pPg->pBt->usableSize];
6570 u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize];
6571 int nRet = 0;
6572 int i;
6573 int iEnd = iFirst + nCell;
6574 u8 *pFree = 0;
6575 int szFree = 0;
6576
6577 for(i=iFirst; i<iEnd; i++){
6578 u8 *pCell = pCArray->apCell[i];
6579 if( SQLITE_WITHIN(pCell, pStart, pEnd) ){
6580 int sz;
6581 /* No need to use cachedCellSize() here. The sizes of all cells that
6582 ** are to be freed have already been computing while deciding which
6583 ** cells need freeing */
6584 sz = pCArray->szCell[i]; assert( sz>0 );
6585 if( pFree!=(pCell + sz) ){
6586 if( pFree ){
6587 assert( pFree>aData && (pFree - aData)<65536 );
6588 freeSpace(pPg, (u16)(pFree - aData), szFree);
6589 }
6590 pFree = pCell;
6591 szFree = sz;
6592 if( pFree+sz>pEnd ) return 0;
6593 }else{
6594 pFree = pCell;
6595 szFree += sz;
6596 }
6597 nRet++;
6598 }
6599 }
6600 if( pFree ){
6601 assert( pFree>aData && (pFree - aData)<65536 );
6602 freeSpace(pPg, (u16)(pFree - aData), szFree);
6603 }
6604 return nRet;
6605 }
6606
6607 /*
6608 ** apCell[] and szCell[] contains pointers to and sizes of all cells in the
6609 ** pages being balanced. The current page, pPg, has pPg->nCell cells starting
6610 ** with apCell[iOld]. After balancing, this page should hold nNew cells
6611 ** starting at apCell[iNew].
6612 **
6613 ** This routine makes the necessary adjustments to pPg so that it contains
6614 ** the correct cells after being balanced.
6615 **
6616 ** The pPg->nFree field is invalid when this function returns. It is the
6617 ** responsibility of the caller to set it correctly.
6618 */
6619 static int editPage(
6620 MemPage *pPg, /* Edit this page */
6621 int iOld, /* Index of first cell currently on page */
6622 int iNew, /* Index of new first cell on page */
6623 int nNew, /* Final number of cells on page */
6624 CellArray *pCArray /* Array of cells and sizes */
6625 ){
6626 u8 * const aData = pPg->aData;
6627 const int hdr = pPg->hdrOffset;
6628 u8 *pBegin = &pPg->aCellIdx[nNew * 2];
6629 int nCell = pPg->nCell; /* Cells stored on pPg */
6630 u8 *pData;
6631 u8 *pCellptr;
6632 int i;
6633 int iOldEnd = iOld + pPg->nCell + pPg->nOverflow;
6634 int iNewEnd = iNew + nNew;
6635
6636 #ifdef SQLITE_DEBUG
6637 u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager);
6638 memcpy(pTmp, aData, pPg->pBt->usableSize);
6639 #endif
6640
6641 /* Remove cells from the start and end of the page */
6642 if( iOld<iNew ){
6643 int nShift = pageFreeArray(pPg, iOld, iNew-iOld, pCArray);
6644 memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2);
6645 nCell -= nShift;
6646 }
6647 if( iNewEnd < iOldEnd ){
6648 nCell -= pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray);
6649 }
6650
6651 pData = &aData[get2byteNotZero(&aData[hdr+5])];
6652 if( pData<pBegin ) goto editpage_fail;
6653
6654 /* Add cells to the start of the page */
6655 if( iNew<iOld ){
6656 int nAdd = MIN(nNew,iOld-iNew);
6657 assert( (iOld-iNew)<nNew || nCell==0 || CORRUPT_DB );
6658 pCellptr = pPg->aCellIdx;
6659 memmove(&pCellptr[nAdd*2], pCellptr, nCell*2);
6660 if( pageInsertArray(
6661 pPg, pBegin, &pData, pCellptr,
6662 iNew, nAdd, pCArray
6663 ) ) goto editpage_fail;
6664 nCell += nAdd;
6665 }
6666
6667 /* Add any overflow cells */
6668 for(i=0; i<pPg->nOverflow; i++){
6669 int iCell = (iOld + pPg->aiOvfl[i]) - iNew;
6670 if( iCell>=0 && iCell<nNew ){
6671 pCellptr = &pPg->aCellIdx[iCell * 2];
6672 memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2);
6673 nCell++;
6674 if( pageInsertArray(
6675 pPg, pBegin, &pData, pCellptr,
6676 iCell+iNew, 1, pCArray
6677 ) ) goto editpage_fail;
6678 }
6679 }
6680
6681 /* Append cells to the end of the page */
6682 pCellptr = &pPg->aCellIdx[nCell*2];
6683 if( pageInsertArray(
6684 pPg, pBegin, &pData, pCellptr,
6685 iNew+nCell, nNew-nCell, pCArray
6686 ) ) goto editpage_fail;
6687
6688 pPg->nCell = nNew;
6689 pPg->nOverflow = 0;
6690
6691 put2byte(&aData[hdr+3], pPg->nCell);
6692 put2byte(&aData[hdr+5], pData - aData);
6693
6694 #ifdef SQLITE_DEBUG
6695 for(i=0; i<nNew && !CORRUPT_DB; i++){
6696 u8 *pCell = pCArray->apCell[i+iNew];
6697 int iOff = get2byteAligned(&pPg->aCellIdx[i*2]);
6698 if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
6699 pCell = &pTmp[pCell - aData];
6700 }
6701 assert( 0==memcmp(pCell, &aData[iOff],
6702 pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) );
6703 }
6704 #endif
6705
6706 return SQLITE_OK;
6707 editpage_fail:
6708 /* Unable to edit this page. Rebuild it from scratch instead. */
6709 populateCellCache(pCArray, iNew, nNew);
6710 return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]);
6711 }
6712
6713 /*
6010 ** The following parameters determine how many adjacent pages get involved 6714 ** The following parameters determine how many adjacent pages get involved
6011 ** in a balancing operation. NN is the number of neighbors on either side 6715 ** in a balancing operation. NN is the number of neighbors on either side
6012 ** of the page that participate in the balancing operation. NB is the 6716 ** of the page that participate in the balancing operation. NB is the
6013 ** total number of pages that participate, including the target page and 6717 ** total number of pages that participate, including the target page and
6014 ** NN neighbors on either side. 6718 ** NN neighbors on either side.
6015 ** 6719 **
6016 ** The minimum value of NN is 1 (of course). Increasing NN above 1 6720 ** The minimum value of NN is 1 (of course). Increasing NN above 1
6017 ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance 6721 ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
6018 ** in exchange for a larger degradation in INSERT and UPDATE performance. 6722 ** in exchange for a larger degradation in INSERT and UPDATE performance.
6019 ** The value of NN appears to give the best results overall. 6723 ** The value of NN appears to give the best results overall.
(...skipping 30 matching lines...) Expand all
6050 BtShared *const pBt = pPage->pBt; /* B-Tree Database */ 6754 BtShared *const pBt = pPage->pBt; /* B-Tree Database */
6051 MemPage *pNew; /* Newly allocated page */ 6755 MemPage *pNew; /* Newly allocated page */
6052 int rc; /* Return Code */ 6756 int rc; /* Return Code */
6053 Pgno pgnoNew; /* Page number of pNew */ 6757 Pgno pgnoNew; /* Page number of pNew */
6054 6758
6055 assert( sqlite3_mutex_held(pPage->pBt->mutex) ); 6759 assert( sqlite3_mutex_held(pPage->pBt->mutex) );
6056 assert( sqlite3PagerIswriteable(pParent->pDbPage) ); 6760 assert( sqlite3PagerIswriteable(pParent->pDbPage) );
6057 assert( pPage->nOverflow==1 ); 6761 assert( pPage->nOverflow==1 );
6058 6762
6059 /* This error condition is now caught prior to reaching this function */ 6763 /* This error condition is now caught prior to reaching this function */
6060 if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; 6764 if( NEVER(pPage->nCell==0) ) return SQLITE_CORRUPT_BKPT;
6061 6765
6062 /* Allocate a new page. This page will become the right-sibling of 6766 /* Allocate a new page. This page will become the right-sibling of
6063 ** pPage. Make the parent page writable, so that the new divider cell 6767 ** pPage. Make the parent page writable, so that the new divider cell
6064 ** may be inserted. If both these operations are successful, proceed. 6768 ** may be inserted. If both these operations are successful, proceed.
6065 */ 6769 */
6066 rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); 6770 rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
6067 6771
6068 if( rc==SQLITE_OK ){ 6772 if( rc==SQLITE_OK ){
6069 6773
6070 u8 *pOut = &pSpace[4]; 6774 u8 *pOut = &pSpace[4];
6071 u8 *pCell = pPage->apOvfl[0]; 6775 u8 *pCell = pPage->apOvfl[0];
6072 u16 szCell = cellSizePtr(pPage, pCell); 6776 u16 szCell = pPage->xCellSize(pPage, pCell);
6073 u8 *pStop; 6777 u8 *pStop;
6074 6778
6075 assert( sqlite3PagerIswriteable(pNew->pDbPage) ); 6779 assert( sqlite3PagerIswriteable(pNew->pDbPage) );
6076 assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); 6780 assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) );
6077 zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); 6781 zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF);
6078 assemblePage(pNew, 1, &pCell, &szCell); 6782 rc = rebuildPage(pNew, 1, &pCell, &szCell);
6783 if( NEVER(rc) ) return rc;
6784 pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell;
6079 6785
6080 /* If this is an auto-vacuum database, update the pointer map 6786 /* If this is an auto-vacuum database, update the pointer map
6081 ** with entries for the new page, and any pointer from the 6787 ** with entries for the new page, and any pointer from the
6082 ** cell on the page to an overflow page. If either of these 6788 ** cell on the page to an overflow page. If either of these
6083 ** operations fails, the return code is set, but the contents 6789 ** operations fails, the return code is set, but the contents
6084 ** of the parent page are still manipulated by thh code below. 6790 ** of the parent page are still manipulated by thh code below.
6085 ** That is Ok, at this point the parent page is guaranteed to 6791 ** That is Ok, at this point the parent page is guaranteed to
6086 ** be marked as dirty. Returning an error code will cause a 6792 ** be marked as dirty. Returning an error code will cause a
6087 ** rollback, undoing any changes made to the parent page. 6793 ** rollback, undoing any changes made to the parent page.
6088 */ 6794 */
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
6140 u8 e; 6846 u8 e;
6141 MemPage *pPage = apPage[i]; 6847 MemPage *pPage = apPage[i];
6142 BtShared *pBt = pPage->pBt; 6848 BtShared *pBt = pPage->pBt;
6143 assert( pPage->isInit ); 6849 assert( pPage->isInit );
6144 6850
6145 for(j=0; j<pPage->nCell; j++){ 6851 for(j=0; j<pPage->nCell; j++){
6146 CellInfo info; 6852 CellInfo info;
6147 u8 *z; 6853 u8 *z;
6148 6854
6149 z = findCell(pPage, j); 6855 z = findCell(pPage, j);
6150 btreeParseCellPtr(pPage, z, &info); 6856 pPage->xParseCell(pPage, z, &info);
6151 if( info.iOverflow ){ 6857 if( info.nLocal<info.nPayload ){
6152 Pgno ovfl = get4byte(&z[info.iOverflow]); 6858 Pgno ovfl = get4byte(&z[info.nSize-4]);
6153 ptrmapGet(pBt, ovfl, &e, &n); 6859 ptrmapGet(pBt, ovfl, &e, &n);
6154 assert( n==pPage->pgno && e==PTRMAP_OVERFLOW1 ); 6860 assert( n==pPage->pgno && e==PTRMAP_OVERFLOW1 );
6155 } 6861 }
6156 if( !pPage->leaf ){ 6862 if( !pPage->leaf ){
6157 Pgno child = get4byte(z); 6863 Pgno child = get4byte(z);
6158 ptrmapGet(pBt, child, &e, &n); 6864 ptrmapGet(pBt, child, &e, &n);
6159 assert( n==pPage->pgno && e==PTRMAP_BTREE ); 6865 assert( n==pPage->pgno && e==PTRMAP_BTREE );
6160 } 6866 }
6161 } 6867 }
6162 if( !pPage->leaf ){ 6868 if( !pPage->leaf ){
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
6260 ** page (pParent) the parent page becomes overfull, this buffer is 6966 ** page (pParent) the parent page becomes overfull, this buffer is
6261 ** used to store the parent's overflow cells. Because this function inserts 6967 ** used to store the parent's overflow cells. Because this function inserts
6262 ** a maximum of four divider cells into the parent page, and the maximum 6968 ** a maximum of four divider cells into the parent page, and the maximum
6263 ** size of a cell stored within an internal node is always less than 1/4 6969 ** size of a cell stored within an internal node is always less than 1/4
6264 ** of the page-size, the aOvflSpace[] buffer is guaranteed to be large 6970 ** of the page-size, the aOvflSpace[] buffer is guaranteed to be large
6265 ** enough for all overflow cells. 6971 ** enough for all overflow cells.
6266 ** 6972 **
6267 ** If aOvflSpace is set to a null pointer, this function returns 6973 ** If aOvflSpace is set to a null pointer, this function returns
6268 ** SQLITE_NOMEM. 6974 ** SQLITE_NOMEM.
6269 */ 6975 */
6270 #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
6271 #pragma optimize("", off)
6272 #endif
6273 static int balance_nonroot( 6976 static int balance_nonroot(
6274 MemPage *pParent, /* Parent page of siblings being balanced */ 6977 MemPage *pParent, /* Parent page of siblings being balanced */
6275 int iParentIdx, /* Index of "the page" in pParent */ 6978 int iParentIdx, /* Index of "the page" in pParent */
6276 u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ 6979 u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */
6277 int isRoot, /* True if pParent is a root-page */ 6980 int isRoot, /* True if pParent is a root-page */
6278 int bBulk /* True if this call is part of a bulk load */ 6981 int bBulk /* True if this call is part of a bulk load */
6279 ){ 6982 ){
6280 BtShared *pBt; /* The whole database */ 6983 BtShared *pBt; /* The whole database */
6281 int nCell = 0; /* Number of cells in apCell[] */
6282 int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ 6984 int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */
6283 int nNew = 0; /* Number of pages in apNew[] */ 6985 int nNew = 0; /* Number of pages in apNew[] */
6284 int nOld; /* Number of pages in apOld[] */ 6986 int nOld; /* Number of pages in apOld[] */
6285 int i, j, k; /* Loop counters */ 6987 int i, j, k; /* Loop counters */
6286 int nxDiv; /* Next divider slot in pParent->aCell[] */ 6988 int nxDiv; /* Next divider slot in pParent->aCell[] */
6287 int rc = SQLITE_OK; /* The return code */ 6989 int rc = SQLITE_OK; /* The return code */
6288 u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ 6990 u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */
6289 int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ 6991 int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
6290 int usableSpace; /* Bytes in pPage beyond the header */ 6992 int usableSpace; /* Bytes in pPage beyond the header */
6291 int pageFlags; /* Value of pPage->aData[0] */ 6993 int pageFlags; /* Value of pPage->aData[0] */
6292 int subtotal; /* Subtotal of bytes in cells on one page */
6293 int iSpace1 = 0; /* First unused byte of aSpace1[] */ 6994 int iSpace1 = 0; /* First unused byte of aSpace1[] */
6294 int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ 6995 int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */
6295 int szScratch; /* Size of scratch memory requested */ 6996 int szScratch; /* Size of scratch memory requested */
6296 MemPage *apOld[NB]; /* pPage and up to two siblings */ 6997 MemPage *apOld[NB]; /* pPage and up to two siblings */
6297 MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
6298 MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ 6998 MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
6299 u8 *pRight; /* Location in parent of right-sibling pointer */ 6999 u8 *pRight; /* Location in parent of right-sibling pointer */
6300 u8 *apDiv[NB-1]; /* Divider cells in pParent */ 7000 u8 *apDiv[NB-1]; /* Divider cells in pParent */
6301 int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */ 7001 int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */
6302 int szNew[NB+2]; /* Combined size of cells place on i-th page */ 7002 int cntOld[NB+2]; /* Old index in b.apCell[] */
6303 u8 **apCell = 0; /* All cells begin balanced */ 7003 int szNew[NB+2]; /* Combined size of cells placed on i-th page */
6304 u16 *szCell; /* Local size of all cells in apCell[] */
6305 u8 *aSpace1; /* Space for copies of dividers cells */ 7004 u8 *aSpace1; /* Space for copies of dividers cells */
6306 Pgno pgno; /* Temp var to store a page number in */ 7005 Pgno pgno; /* Temp var to store a page number in */
7006 u8 abDone[NB+2]; /* True after i'th new page is populated */
7007 Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */
7008 Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */
7009 u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */
7010 CellArray b; /* Parsed information on cells being balanced */
6307 7011
7012 memset(abDone, 0, sizeof(abDone));
7013 b.nCell = 0;
7014 b.apCell = 0;
6308 pBt = pParent->pBt; 7015 pBt = pParent->pBt;
6309 assert( sqlite3_mutex_held(pBt->mutex) ); 7016 assert( sqlite3_mutex_held(pBt->mutex) );
6310 assert( sqlite3PagerIswriteable(pParent->pDbPage) ); 7017 assert( sqlite3PagerIswriteable(pParent->pDbPage) );
6311 7018
6312 #if 0 7019 #if 0
6313 TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno)); 7020 TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
6314 #endif 7021 #endif
6315 7022
6316 /* At this point pParent may have at most one overflow cell. And if 7023 /* At this point pParent may have at most one overflow cell. And if
6317 ** this overflow cell is present, it must be the cell with 7024 ** this overflow cell is present, it must be the cell with
(...skipping 21 matching lines...) Expand all
6339 i = pParent->nOverflow + pParent->nCell; 7046 i = pParent->nOverflow + pParent->nCell;
6340 if( i<2 ){ 7047 if( i<2 ){
6341 nxDiv = 0; 7048 nxDiv = 0;
6342 }else{ 7049 }else{
6343 assert( bBulk==0 || bBulk==1 ); 7050 assert( bBulk==0 || bBulk==1 );
6344 if( iParentIdx==0 ){ 7051 if( iParentIdx==0 ){
6345 nxDiv = 0; 7052 nxDiv = 0;
6346 }else if( iParentIdx==i ){ 7053 }else if( iParentIdx==i ){
6347 nxDiv = i-2+bBulk; 7054 nxDiv = i-2+bBulk;
6348 }else{ 7055 }else{
6349 assert( bBulk==0 );
6350 nxDiv = iParentIdx-1; 7056 nxDiv = iParentIdx-1;
6351 } 7057 }
6352 i = 2-bBulk; 7058 i = 2-bBulk;
6353 } 7059 }
6354 nOld = i+1; 7060 nOld = i+1;
6355 if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ 7061 if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){
6356 pRight = &pParent->aData[pParent->hdrOffset+8]; 7062 pRight = &pParent->aData[pParent->hdrOffset+8];
6357 }else{ 7063 }else{
6358 pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); 7064 pRight = findCell(pParent, i+nxDiv-pParent->nOverflow);
6359 } 7065 }
6360 pgno = get4byte(pRight); 7066 pgno = get4byte(pRight);
6361 while( 1 ){ 7067 while( 1 ){
6362 rc = getAndInitPage(pBt, pgno, &apOld[i], 0); 7068 rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0);
6363 if( rc ){ 7069 if( rc ){
6364 memset(apOld, 0, (i+1)*sizeof(MemPage*)); 7070 memset(apOld, 0, (i+1)*sizeof(MemPage*));
6365 goto balance_cleanup; 7071 goto balance_cleanup;
6366 } 7072 }
6367 nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; 7073 nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
6368 if( (i--)==0 ) break; 7074 if( (i--)==0 ) break;
6369 7075
6370 if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ 7076 if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){
6371 apDiv[i] = pParent->apOvfl[0]; 7077 apDiv[i] = pParent->apOvfl[0];
6372 pgno = get4byte(apDiv[i]); 7078 pgno = get4byte(apDiv[i]);
6373 szNew[i] = cellSizePtr(pParent, apDiv[i]); 7079 szNew[i] = pParent->xCellSize(pParent, apDiv[i]);
6374 pParent->nOverflow = 0; 7080 pParent->nOverflow = 0;
6375 }else{ 7081 }else{
6376 apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); 7082 apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow);
6377 pgno = get4byte(apDiv[i]); 7083 pgno = get4byte(apDiv[i]);
6378 szNew[i] = cellSizePtr(pParent, apDiv[i]); 7084 szNew[i] = pParent->xCellSize(pParent, apDiv[i]);
6379 7085
6380 /* Drop the cell from the parent page. apDiv[i] still points to 7086 /* Drop the cell from the parent page. apDiv[i] still points to
6381 ** the cell within the parent, even though it has been dropped. 7087 ** the cell within the parent, even though it has been dropped.
6382 ** This is safe because dropping a cell only overwrites the first 7088 ** This is safe because dropping a cell only overwrites the first
6383 ** four bytes of it, and this function does not need the first 7089 ** four bytes of it, and this function does not need the first
6384 ** four bytes of the divider cell. So the pointer is safe to use 7090 ** four bytes of the divider cell. So the pointer is safe to use
6385 ** later on. 7091 ** later on.
6386 ** 7092 **
6387 ** But not if we are in secure-delete mode. In secure-delete mode, 7093 ** But not if we are in secure-delete mode. In secure-delete mode,
6388 ** the dropCell() routine will overwrite the entire cell with zeroes. 7094 ** the dropCell() routine will overwrite the entire cell with zeroes.
(...skipping 17 matching lines...) Expand all
6406 } 7112 }
6407 } 7113 }
6408 7114
6409 /* Make nMaxCells a multiple of 4 in order to preserve 8-byte 7115 /* Make nMaxCells a multiple of 4 in order to preserve 8-byte
6410 ** alignment */ 7116 ** alignment */
6411 nMaxCells = (nMaxCells + 3)&~3; 7117 nMaxCells = (nMaxCells + 3)&~3;
6412 7118
6413 /* 7119 /*
6414 ** Allocate space for memory structures 7120 ** Allocate space for memory structures
6415 */ 7121 */
6416 k = pBt->pageSize + ROUND8(sizeof(MemPage));
6417 szScratch = 7122 szScratch =
6418 nMaxCells*sizeof(u8*) /* apCell */ 7123 nMaxCells*sizeof(u8*) /* b.apCell */
6419 + nMaxCells*sizeof(u16) /* szCell */ 7124 + nMaxCells*sizeof(u16) /* b.szCell */
6420 + pBt->pageSize /* aSpace1 */ 7125 + pBt->pageSize; /* aSpace1 */
6421 + k*nOld; /* Page copies (apCopy) */ 7126
6422 apCell = sqlite3ScratchMalloc( szScratch ); 7127 /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer
6423 if( apCell==0 ){ 7128 ** that is more than 6 times the database page size. */
7129 assert( szScratch<=6*(int)pBt->pageSize );
7130 b.apCell = sqlite3ScratchMalloc( szScratch );
7131 if( b.apCell==0 ){
6424 rc = SQLITE_NOMEM; 7132 rc = SQLITE_NOMEM;
6425 goto balance_cleanup; 7133 goto balance_cleanup;
6426 } 7134 }
6427 szCell = (u16*)&apCell[nMaxCells]; 7135 b.szCell = (u16*)&b.apCell[nMaxCells];
6428 aSpace1 = (u8*)&szCell[nMaxCells]; 7136 aSpace1 = (u8*)&b.szCell[nMaxCells];
6429 assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); 7137 assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
6430 7138
6431 /* 7139 /*
6432 ** Load pointers to all cells on sibling pages and the divider cells 7140 ** Load pointers to all cells on sibling pages and the divider cells
6433 ** into the local apCell[] array. Make copies of the divider cells 7141 ** into the local b.apCell[] array. Make copies of the divider cells
6434 ** into space obtained from aSpace1[] and remove the divider cells 7142 ** into space obtained from aSpace1[]. The divider cells have already
6435 ** from pParent. 7143 ** been removed from pParent.
6436 ** 7144 **
6437 ** If the siblings are on leaf pages, then the child pointers of the 7145 ** If the siblings are on leaf pages, then the child pointers of the
6438 ** divider cells are stripped from the cells before they are copied 7146 ** divider cells are stripped from the cells before they are copied
6439 ** into aSpace1[]. In this way, all cells in apCell[] are without 7147 ** into aSpace1[]. In this way, all cells in b.apCell[] are without
6440 ** child pointers. If siblings are not leaves, then all cell in 7148 ** child pointers. If siblings are not leaves, then all cell in
6441 ** apCell[] include child pointers. Either way, all cells in apCell[] 7149 ** b.apCell[] include child pointers. Either way, all cells in b.apCell[]
6442 ** are alike. 7150 ** are alike.
6443 ** 7151 **
6444 ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. 7152 ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
6445 ** leafData: 1 if pPage holds key+data and pParent holds only keys. 7153 ** leafData: 1 if pPage holds key+data and pParent holds only keys.
6446 */ 7154 */
6447 leafCorrection = apOld[0]->leaf*4; 7155 b.pRef = apOld[0];
6448 leafData = apOld[0]->intKeyLeaf; 7156 leafCorrection = b.pRef->leaf*4;
7157 leafData = b.pRef->intKeyLeaf;
6449 for(i=0; i<nOld; i++){ 7158 for(i=0; i<nOld; i++){
6450 int limit; 7159 MemPage *pOld = apOld[i];
6451 7160 int limit = pOld->nCell;
6452 /* Before doing anything else, take a copy of the i'th original sibling 7161 u8 *aData = pOld->aData;
6453 ** The rest of this function will use data from the copies rather 7162 u16 maskPage = pOld->maskPage;
6454 ** that the original pages since the original pages will be in the 7163 u8 *piCell = aData + pOld->cellOffset;
6455 ** process of being overwritten. */ 7164 u8 *piEnd;
6456 MemPage *pOld = apCopy[i] = (MemPage*)&aSpace1[pBt->pageSize + k*i]; 7165
6457 memcpy(pOld, apOld[i], sizeof(MemPage)); 7166 /* Verify that all sibling pages are of the same "type" (table-leaf,
6458 pOld->aData = (void*)&pOld[1]; 7167 ** table-interior, index-leaf, or index-interior).
6459 memcpy(pOld->aData, apOld[i]->aData, pBt->pageSize); 7168 */
6460 7169 if( pOld->aData[0]!=apOld[0]->aData[0] ){
6461 limit = pOld->nCell+pOld->nOverflow; 7170 rc = SQLITE_CORRUPT_BKPT;
7171 goto balance_cleanup;
7172 }
7173
7174 /* Load b.apCell[] with pointers to all cells in pOld. If pOld
7175 ** constains overflow cells, include them in the b.apCell[] array
7176 ** in the correct spot.
7177 **
7178 ** Note that when there are multiple overflow cells, it is always the
7179 ** case that they are sequential and adjacent. This invariant arises
7180 ** because multiple overflows can only occurs when inserting divider
7181 ** cells into a parent on a prior balance, and divider cells are always
7182 ** adjacent and are inserted in order. There is an assert() tagged
7183 ** with "NOTE 1" in the overflow cell insertion loop to prove this
7184 ** invariant.
7185 **
7186 ** This must be done in advance. Once the balance starts, the cell
7187 ** offset section of the btree page will be overwritten and we will no
7188 ** long be able to find the cells if a pointer to each cell is not saved
7189 ** first.
7190 */
7191 memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit);
6462 if( pOld->nOverflow>0 ){ 7192 if( pOld->nOverflow>0 ){
7193 memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
7194 limit = pOld->aiOvfl[0];
6463 for(j=0; j<limit; j++){ 7195 for(j=0; j<limit; j++){
6464 assert( nCell<nMaxCells ); 7196 b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
6465 apCell[nCell] = findOverflowCell(pOld, j); 7197 piCell += 2;
6466 szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); 7198 b.nCell++;
6467 nCell++; 7199 }
6468 } 7200 for(k=0; k<pOld->nOverflow; k++){
6469 }else{ 7201 assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */
6470 u8 *aData = pOld->aData; 7202 b.apCell[b.nCell] = pOld->apOvfl[k];
6471 u16 maskPage = pOld->maskPage; 7203 b.nCell++;
6472 u16 cellOffset = pOld->cellOffset; 7204 }
6473 for(j=0; j<limit; j++){ 7205 }
6474 assert( nCell<nMaxCells ); 7206 piEnd = aData + pOld->cellOffset + 2*pOld->nCell;
6475 apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j); 7207 while( piCell<piEnd ){
6476 szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); 7208 assert( b.nCell<nMaxCells );
6477 nCell++; 7209 b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
6478 } 7210 piCell += 2;
6479 } 7211 b.nCell++;
7212 }
7213
7214 cntOld[i] = b.nCell;
6480 if( i<nOld-1 && !leafData){ 7215 if( i<nOld-1 && !leafData){
6481 u16 sz = (u16)szNew[i]; 7216 u16 sz = (u16)szNew[i];
6482 u8 *pTemp; 7217 u8 *pTemp;
6483 assert( nCell<nMaxCells ); 7218 assert( b.nCell<nMaxCells );
6484 szCell[nCell] = sz; 7219 b.szCell[b.nCell] = sz;
6485 pTemp = &aSpace1[iSpace1]; 7220 pTemp = &aSpace1[iSpace1];
6486 iSpace1 += sz; 7221 iSpace1 += sz;
6487 assert( sz<=pBt->maxLocal+23 ); 7222 assert( sz<=pBt->maxLocal+23 );
6488 assert( iSpace1 <= (int)pBt->pageSize ); 7223 assert( iSpace1 <= (int)pBt->pageSize );
6489 memcpy(pTemp, apDiv[i], sz); 7224 memcpy(pTemp, apDiv[i], sz);
6490 apCell[nCell] = pTemp+leafCorrection; 7225 b.apCell[b.nCell] = pTemp+leafCorrection;
6491 assert( leafCorrection==0 || leafCorrection==4 ); 7226 assert( leafCorrection==0 || leafCorrection==4 );
6492 szCell[nCell] = szCell[nCell] - leafCorrection; 7227 b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection;
6493 if( !pOld->leaf ){ 7228 if( !pOld->leaf ){
6494 assert( leafCorrection==0 ); 7229 assert( leafCorrection==0 );
6495 assert( pOld->hdrOffset==0 ); 7230 assert( pOld->hdrOffset==0 );
6496 /* The right pointer of the child page pOld becomes the left 7231 /* The right pointer of the child page pOld becomes the left
6497 ** pointer of the divider cell */ 7232 ** pointer of the divider cell */
6498 memcpy(apCell[nCell], &pOld->aData[8], 4); 7233 memcpy(b.apCell[b.nCell], &pOld->aData[8], 4);
6499 }else{ 7234 }else{
6500 assert( leafCorrection==4 ); 7235 assert( leafCorrection==4 );
6501 if( szCell[nCell]<4 ){ 7236 while( b.szCell[b.nCell]<4 ){
6502 /* Do not allow any cells smaller than 4 bytes. */ 7237 /* Do not allow any cells smaller than 4 bytes. If a smaller cell
6503 szCell[nCell] = 4; 7238 ** does exist, pad it with 0x00 bytes. */
7239 assert( b.szCell[b.nCell]==3 || CORRUPT_DB );
7240 assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB );
7241 aSpace1[iSpace1++] = 0x00;
7242 b.szCell[b.nCell]++;
6504 } 7243 }
6505 } 7244 }
6506 nCell++; 7245 b.nCell++;
6507 } 7246 }
6508 } 7247 }
6509 7248
6510 /* 7249 /*
6511 ** Figure out the number of pages needed to hold all nCell cells. 7250 ** Figure out the number of pages needed to hold all b.nCell cells.
6512 ** Store this number in "k". Also compute szNew[] which is the total 7251 ** Store this number in "k". Also compute szNew[] which is the total
6513 ** size of all cells on the i-th page and cntNew[] which is the index 7252 ** size of all cells on the i-th page and cntNew[] which is the index
6514 ** in apCell[] of the cell that divides page i from page i+1. 7253 ** in b.apCell[] of the cell that divides page i from page i+1.
6515 ** cntNew[k] should equal nCell. 7254 ** cntNew[k] should equal b.nCell.
6516 ** 7255 **
6517 ** Values computed by this block: 7256 ** Values computed by this block:
6518 ** 7257 **
6519 ** k: The total number of sibling pages 7258 ** k: The total number of sibling pages
6520 ** szNew[i]: Spaced used on the i-th sibling page. 7259 ** szNew[i]: Spaced used on the i-th sibling page.
6521 ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to 7260 ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to
6522 ** the right of the i-th sibling page. 7261 ** the right of the i-th sibling page.
6523 ** usableSpace: Number of bytes of space available on each sibling. 7262 ** usableSpace: Number of bytes of space available on each sibling.
6524 ** 7263 **
6525 */ 7264 */
6526 usableSpace = pBt->usableSize - 12 + leafCorrection; 7265 usableSpace = pBt->usableSize - 12 + leafCorrection;
6527 for(subtotal=k=i=0; i<nCell; i++){ 7266 for(i=0; i<nOld; i++){
6528 assert( i<nMaxCells ); 7267 MemPage *p = apOld[i];
6529 subtotal += szCell[i] + 2; 7268 szNew[i] = usableSpace - p->nFree;
6530 if( subtotal > usableSpace ){ 7269 if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
6531 szNew[k] = subtotal - szCell[i]; 7270 for(j=0; j<p->nOverflow; j++){
6532 cntNew[k] = i; 7271 szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]);
6533 if( leafData ){ i--; } 7272 }
6534 subtotal = 0; 7273 cntNew[i] = cntOld[i];
6535 k++; 7274 }
6536 if( k>NB+1 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } 7275 k = nOld;
6537 } 7276 for(i=0; i<k; i++){
6538 } 7277 int sz;
6539 szNew[k] = subtotal; 7278 while( szNew[i]>usableSpace ){
6540 cntNew[k] = nCell; 7279 if( i+1>=k ){
6541 k++; 7280 k = i+2;
7281 if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; }
7282 szNew[k-1] = 0;
7283 cntNew[k-1] = b.nCell;
7284 }
7285 sz = 2 + cachedCellSize(&b, cntNew[i]-1);
7286 szNew[i] -= sz;
7287 if( !leafData ){
7288 if( cntNew[i]<b.nCell ){
7289 sz = 2 + cachedCellSize(&b, cntNew[i]);
7290 }else{
7291 sz = 0;
7292 }
7293 }
7294 szNew[i+1] += sz;
7295 cntNew[i]--;
7296 }
7297 while( cntNew[i]<b.nCell ){
7298 sz = 2 + cachedCellSize(&b, cntNew[i]);
7299 if( szNew[i]+sz>usableSpace ) break;
7300 szNew[i] += sz;
7301 cntNew[i]++;
7302 if( !leafData ){
7303 if( cntNew[i]<b.nCell ){
7304 sz = 2 + cachedCellSize(&b, cntNew[i]);
7305 }else{
7306 sz = 0;
7307 }
7308 }
7309 szNew[i+1] -= sz;
7310 }
7311 if( cntNew[i]>=b.nCell ){
7312 k = i+1;
7313 }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){
7314 rc = SQLITE_CORRUPT_BKPT;
7315 goto balance_cleanup;
7316 }
7317 }
6542 7318
6543 /* 7319 /*
6544 ** The packing computed by the previous block is biased toward the siblings 7320 ** The packing computed by the previous block is biased toward the siblings
6545 ** on the left side. The left siblings are always nearly full, while the 7321 ** on the left side (siblings with smaller keys). The left siblings are
6546 ** right-most sibling might be nearly empty. This block of code attempts 7322 ** always nearly full, while the right-most sibling might be nearly empty.
6547 ** to adjust the packing of siblings to get a better balance. 7323 ** The next block of code attempts to adjust the packing of siblings to
7324 ** get a better balance.
6548 ** 7325 **
6549 ** This adjustment is more than an optimization. The packing above might 7326 ** This adjustment is more than an optimization. The packing above might
6550 ** be so out of balance as to be illegal. For example, the right-most 7327 ** be so out of balance as to be illegal. For example, the right-most
6551 ** sibling might be completely empty. This adjustment is not optional. 7328 ** sibling might be completely empty. This adjustment is not optional.
6552 */ 7329 */
6553 for(i=k-1; i>0; i--){ 7330 for(i=k-1; i>0; i--){
6554 int szRight = szNew[i]; /* Size of sibling on the right */ 7331 int szRight = szNew[i]; /* Size of sibling on the right */
6555 int szLeft = szNew[i-1]; /* Size of sibling on the left */ 7332 int szLeft = szNew[i-1]; /* Size of sibling on the left */
6556 int r; /* Index of right-most cell in left sibling */ 7333 int r; /* Index of right-most cell in left sibling */
6557 int d; /* Index of first cell to the left of right sibling */ 7334 int d; /* Index of first cell to the left of right sibling */
6558 7335
6559 r = cntNew[i-1] - 1; 7336 r = cntNew[i-1] - 1;
6560 d = r + 1 - leafData; 7337 d = r + 1 - leafData;
6561 assert( d<nMaxCells ); 7338 (void)cachedCellSize(&b, d);
6562 assert( r<nMaxCells ); 7339 do{
6563 while( szRight==0 7340 assert( d<nMaxCells );
6564 || (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2)) 7341 assert( r<nMaxCells );
6565 ){ 7342 (void)cachedCellSize(&b, r);
6566 szRight += szCell[d] + 2; 7343 if( szRight!=0
6567 szLeft -= szCell[r] + 2; 7344 && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+2)) ){
6568 cntNew[i-1]--; 7345 break;
6569 r = cntNew[i-1] - 1; 7346 }
6570 d = r + 1 - leafData; 7347 szRight += b.szCell[d] + 2;
6571 } 7348 szLeft -= b.szCell[r] + 2;
7349 cntNew[i-1] = r;
7350 r--;
7351 d--;
7352 }while( r>=0 );
6572 szNew[i] = szRight; 7353 szNew[i] = szRight;
6573 szNew[i-1] = szLeft; 7354 szNew[i-1] = szLeft;
7355 if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){
7356 rc = SQLITE_CORRUPT_BKPT;
7357 goto balance_cleanup;
7358 }
6574 } 7359 }
6575 7360
6576 /* Either we found one or more cells (cntnew[0])>0) or pPage is 7361 /* Sanity check: For a non-corrupt database file one of the follwing
6577 ** a virtual root page. A virtual root page is when the real root 7362 ** must be true:
6578 ** page is page 1 and we are the only child of that page. 7363 ** (1) We found one or more cells (cntNew[0])>0), or
6579 ** 7364 ** (2) pPage is a virtual root page. A virtual root page is when
6580 ** UPDATE: The assert() below is not necessarily true if the database 7365 ** the real root page is page 1 and we are the only child of
6581 ** file is corrupt. The corruption will be detected and reported later 7366 ** that page.
6582 ** in this procedure so there is no need to act upon it now.
6583 */ 7367 */
6584 #if 0 7368 assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB);
6585 assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) ); 7369 TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n",
6586 #endif 7370 apOld[0]->pgno, apOld[0]->nCell,
6587 7371 nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0,
6588 TRACE(("BALANCE: old: %d %d %d ", 7372 nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0
6589 apOld[0]->pgno,
6590 nOld>=2 ? apOld[1]->pgno : 0,
6591 nOld>=3 ? apOld[2]->pgno : 0
6592 )); 7373 ));
6593 7374
6594 /* 7375 /*
6595 ** Allocate k new pages. Reuse old pages where possible. 7376 ** Allocate k new pages. Reuse old pages where possible.
6596 */ 7377 */
6597 if( apOld[0]->pgno<=1 ){
6598 rc = SQLITE_CORRUPT_BKPT;
6599 goto balance_cleanup;
6600 }
6601 pageFlags = apOld[0]->aData[0]; 7378 pageFlags = apOld[0]->aData[0];
6602 for(i=0; i<k; i++){ 7379 for(i=0; i<k; i++){
6603 MemPage *pNew; 7380 MemPage *pNew;
6604 if( i<nOld ){ 7381 if( i<nOld ){
6605 pNew = apNew[i] = apOld[i]; 7382 pNew = apNew[i] = apOld[i];
6606 apOld[i] = 0; 7383 apOld[i] = 0;
6607 rc = sqlite3PagerWrite(pNew->pDbPage); 7384 rc = sqlite3PagerWrite(pNew->pDbPage);
6608 nNew++; 7385 nNew++;
6609 if( rc ) goto balance_cleanup; 7386 if( rc ) goto balance_cleanup;
6610 }else{ 7387 }else{
6611 assert( i>0 ); 7388 assert( i>0 );
6612 rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); 7389 rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0);
6613 if( rc ) goto balance_cleanup; 7390 if( rc ) goto balance_cleanup;
7391 zeroPage(pNew, pageFlags);
6614 apNew[i] = pNew; 7392 apNew[i] = pNew;
6615 nNew++; 7393 nNew++;
7394 cntOld[i] = b.nCell;
6616 7395
6617 /* Set the pointer-map entry for the new sibling page. */ 7396 /* Set the pointer-map entry for the new sibling page. */
6618 if( ISAUTOVACUUM ){ 7397 if( ISAUTOVACUUM ){
6619 ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); 7398 ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc);
6620 if( rc!=SQLITE_OK ){ 7399 if( rc!=SQLITE_OK ){
6621 goto balance_cleanup; 7400 goto balance_cleanup;
6622 } 7401 }
6623 } 7402 }
6624 } 7403 }
6625 } 7404 }
6626 7405
6627 /* Free any old pages that were not reused as new pages. 7406 /*
7407 ** Reassign page numbers so that the new pages are in ascending order.
7408 ** This helps to keep entries in the disk file in order so that a scan
7409 ** of the table is closer to a linear scan through the file. That in turn
7410 ** helps the operating system to deliver pages from the disk more rapidly.
7411 **
7412 ** An O(n^2) insertion sort algorithm is used, but since n is never more
7413 ** than (NB+2) (a small constant), that should not be a problem.
7414 **
7415 ** When NB==3, this one optimization makes the database about 25% faster
7416 ** for large insertions and deletions.
6628 */ 7417 */
6629 while( i<nOld ){ 7418 for(i=0; i<nNew; i++){
6630 freePage(apOld[i], &rc); 7419 aPgOrder[i] = aPgno[i] = apNew[i]->pgno;
6631 if( rc ) goto balance_cleanup; 7420 aPgFlags[i] = apNew[i]->pDbPage->flags;
6632 releasePage(apOld[i]); 7421 for(j=0; j<i; j++){
6633 apOld[i] = 0; 7422 if( aPgno[j]==aPgno[i] ){
6634 i++; 7423 /* This branch is taken if the set of sibling pages somehow contains
6635 } 7424 ** duplicate entries. This can happen if the database is corrupt.
6636 7425 ** It would be simpler to detect this as part of the loop below, but
6637 /* 7426 ** we do the detection here in order to avoid populating the pager
6638 ** Put the new pages in ascending order. This helps to 7427 ** cache with two separate objects associated with the same
6639 ** keep entries in the disk file in order so that a scan 7428 ** page number. */
6640 ** of the table is a linear scan through the file. That 7429 assert( CORRUPT_DB );
6641 ** in turn helps the operating system to deliver pages 7430 rc = SQLITE_CORRUPT_BKPT;
6642 ** from the disk more rapidly. 7431 goto balance_cleanup;
6643 ** 7432 }
6644 ** An O(n^2) insertion sort algorithm is used, but since 7433 }
6645 ** n is never more than NB (a small constant), that should 7434 }
6646 ** not be a problem. 7435 for(i=0; i<nNew; i++){
6647 ** 7436 int iBest = 0; /* aPgno[] index of page number to use */
6648 ** When NB==3, this one optimization makes the database 7437 for(j=1; j<nNew; j++){
6649 ** about 25% faster for large insertions and deletions. 7438 if( aPgOrder[j]<aPgOrder[iBest] ) iBest = j;
6650 */ 7439 }
6651 for(i=0; i<k-1; i++){ 7440 pgno = aPgOrder[iBest];
6652 int minV = apNew[i]->pgno; 7441 aPgOrder[iBest] = 0xffffffff;
6653 int minI = i; 7442 if( iBest!=i ){
6654 for(j=i+1; j<k; j++){ 7443 if( iBest>i ){
6655 if( apNew[j]->pgno<(unsigned)minV ){ 7444 sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0);
6656 minI = j; 7445 }
6657 minV = apNew[j]->pgno; 7446 sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]);
6658 } 7447 apNew[i]->pgno = pgno;
6659 } 7448 }
6660 if( minI>i ){ 7449 }
6661 MemPage *pT; 7450
6662 pT = apNew[i]; 7451 TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) "
6663 apNew[i] = apNew[minI]; 7452 "%d(%d nc=%d) %d(%d nc=%d)\n",
6664 apNew[minI] = pT; 7453 apNew[0]->pgno, szNew[0], cntNew[0],
6665 }
6666 }
6667 TRACE(("new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
6668 apNew[0]->pgno, szNew[0],
6669 nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0, 7454 nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0,
7455 nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0,
6670 nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0, 7456 nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0,
7457 nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0,
6671 nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0, 7458 nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0,
6672 nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0)); 7459 nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0,
7460 nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0,
7461 nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0
7462 ));
6673 7463
6674 assert( sqlite3PagerIswriteable(pParent->pDbPage) ); 7464 assert( sqlite3PagerIswriteable(pParent->pDbPage) );
6675 put4byte(pRight, apNew[nNew-1]->pgno); 7465 put4byte(pRight, apNew[nNew-1]->pgno);
6676 7466
6677 /* 7467 /* If the sibling pages are not leaves, ensure that the right-child pointer
6678 ** Evenly distribute the data in apCell[] across the new pages. 7468 ** of the right-most new sibling page is set to the value that was
6679 ** Insert divider cells into pParent as necessary. 7469 ** originally in the same field of the right-most old sibling page. */
7470 if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){
7471 MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1];
7472 memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4);
7473 }
7474
7475 /* Make any required updates to pointer map entries associated with
7476 ** cells stored on sibling pages following the balance operation. Pointer
7477 ** map entries associated with divider cells are set by the insertCell()
7478 ** routine. The associated pointer map entries are:
7479 **
7480 ** a) if the cell contains a reference to an overflow chain, the
7481 ** entry associated with the first page in the overflow chain, and
7482 **
7483 ** b) if the sibling pages are not leaves, the child page associated
7484 ** with the cell.
7485 **
7486 ** If the sibling pages are not leaves, then the pointer map entry
7487 ** associated with the right-child of each sibling may also need to be
7488 ** updated. This happens below, after the sibling pages have been
7489 ** populated, not here.
6680 */ 7490 */
6681 j = 0; 7491 if( ISAUTOVACUUM ){
6682 for(i=0; i<nNew; i++){ 7492 MemPage *pNew = apNew[0];
6683 /* Assemble the new sibling page. */ 7493 u8 *aOld = pNew->aData;
7494 int cntOldNext = pNew->nCell + pNew->nOverflow;
7495 int usableSize = pBt->usableSize;
7496 int iNew = 0;
7497 int iOld = 0;
7498
7499 for(i=0; i<b.nCell; i++){
7500 u8 *pCell = b.apCell[i];
7501 if( i==cntOldNext ){
7502 MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld];
7503 cntOldNext += pOld->nCell + pOld->nOverflow + !leafData;
7504 aOld = pOld->aData;
7505 }
7506 if( i==cntNew[iNew] ){
7507 pNew = apNew[++iNew];
7508 if( !leafData ) continue;
7509 }
7510
7511 /* Cell pCell is destined for new sibling page pNew. Originally, it
7512 ** was either part of sibling page iOld (possibly an overflow cell),
7513 ** or else the divider cell to the left of sibling page iOld. So,
7514 ** if sibling page iOld had the same page number as pNew, and if
7515 ** pCell really was a part of sibling page iOld (not a divider or
7516 ** overflow cell), we can skip updating the pointer map entries. */
7517 if( iOld>=nNew
7518 || pNew->pgno!=aPgno[iOld]
7519 || !SQLITE_WITHIN(pCell,aOld,&aOld[usableSize])
7520 ){
7521 if( !leafCorrection ){
7522 ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc);
7523 }
7524 if( cachedCellSize(&b,i)>pNew->minLocal ){
7525 ptrmapPutOvflPtr(pNew, pCell, &rc);
7526 }
7527 if( rc ) goto balance_cleanup;
7528 }
7529 }
7530 }
7531
7532 /* Insert new divider cells into pParent. */
7533 for(i=0; i<nNew-1; i++){
7534 u8 *pCell;
7535 u8 *pTemp;
7536 int sz;
6684 MemPage *pNew = apNew[i]; 7537 MemPage *pNew = apNew[i];
7538 j = cntNew[i];
7539
6685 assert( j<nMaxCells ); 7540 assert( j<nMaxCells );
6686 zeroPage(pNew, pageFlags); 7541 assert( b.apCell[j]!=0 );
6687 assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]); 7542 pCell = b.apCell[j];
6688 assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) ); 7543 sz = b.szCell[j] + leafCorrection;
6689 assert( pNew->nOverflow==0 ); 7544 pTemp = &aOvflSpace[iOvflSpace];
6690 7545 if( !pNew->leaf ){
6691 j = cntNew[i]; 7546 memcpy(&pNew->aData[8], pCell, 4);
6692 7547 }else if( leafData ){
6693 /* If the sibling page assembled above was not the right-most sibling, 7548 /* If the tree is a leaf-data tree, and the siblings are leaves,
6694 ** insert a divider cell into the parent page. 7549 ** then there is no divider cell in b.apCell[]. Instead, the divider
6695 */ 7550 ** cell consists of the integer key for the right-most cell of
6696 assert( i<nNew-1 || j==nCell ); 7551 ** the sibling-page assembled above only.
6697 if( j<nCell ){ 7552 */
6698 u8 *pCell; 7553 CellInfo info;
6699 u8 *pTemp; 7554 j--;
6700 int sz; 7555 pNew->xParseCell(pNew, b.apCell[j], &info);
6701 7556 pCell = pTemp;
6702 assert( j<nMaxCells ); 7557 sz = 4 + putVarint(&pCell[4], info.nKey);
6703 pCell = apCell[j]; 7558 pTemp = 0;
6704 sz = szCell[j] + leafCorrection; 7559 }else{
6705 pTemp = &aOvflSpace[iOvflSpace]; 7560 pCell -= 4;
6706 if( !pNew->leaf ){ 7561 /* Obscure case for non-leaf-data trees: If the cell at pCell was
6707 memcpy(&pNew->aData[8], pCell, 4); 7562 ** previously stored on a leaf node, and its reported size was 4
6708 }else if( leafData ){ 7563 ** bytes, then it may actually be smaller than this
6709 /* If the tree is a leaf-data tree, and the siblings are leaves, 7564 ** (see btreeParseCellPtr(), 4 bytes is the minimum size of
6710 ** then there is no divider cell in apCell[]. Instead, the divider 7565 ** any cell). But it is important to pass the correct size to
6711 ** cell consists of the integer key for the right-most cell of 7566 ** insertCell(), so reparse the cell now.
6712 ** the sibling-page assembled above only. 7567 **
6713 */ 7568 ** Note that this can never happen in an SQLite data file, as all
6714 CellInfo info; 7569 ** cells are at least 4 bytes. It only happens in b-trees used
6715 j--; 7570 ** to evaluate "IN (SELECT ...)" and similar clauses.
6716 btreeParseCellPtr(pNew, apCell[j], &info); 7571 */
6717 pCell = pTemp; 7572 if( b.szCell[j]==4 ){
6718 sz = 4 + putVarint(&pCell[4], info.nKey); 7573 assert(leafCorrection==4);
6719 pTemp = 0; 7574 sz = pParent->xCellSize(pParent, pCell);
7575 }
7576 }
7577 iOvflSpace += sz;
7578 assert( sz<=pBt->maxLocal+23 );
7579 assert( iOvflSpace <= (int)pBt->pageSize );
7580 insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc);
7581 if( rc!=SQLITE_OK ) goto balance_cleanup;
7582 assert( sqlite3PagerIswriteable(pParent->pDbPage) );
7583 }
7584
7585 /* Now update the actual sibling pages. The order in which they are updated
7586 ** is important, as this code needs to avoid disrupting any page from which
7587 ** cells may still to be read. In practice, this means:
7588 **
7589 ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1])
7590 ** then it is not safe to update page apNew[iPg] until after
7591 ** the left-hand sibling apNew[iPg-1] has been updated.
7592 **
7593 ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1])
7594 ** then it is not safe to update page apNew[iPg] until after
7595 ** the right-hand sibling apNew[iPg+1] has been updated.
7596 **
7597 ** If neither of the above apply, the page is safe to update.
7598 **
7599 ** The iPg value in the following loop starts at nNew-1 goes down
7600 ** to 0, then back up to nNew-1 again, thus making two passes over
7601 ** the pages. On the initial downward pass, only condition (1) above
7602 ** needs to be tested because (2) will always be true from the previous
7603 ** step. On the upward pass, both conditions are always true, so the
7604 ** upwards pass simply processes pages that were missed on the downward
7605 ** pass.
7606 */
7607 for(i=1-nNew; i<nNew; i++){
7608 int iPg = i<0 ? -i : i;
7609 assert( iPg>=0 && iPg<nNew );
7610 if( abDone[iPg] ) continue; /* Skip pages already processed */
7611 if( i>=0 /* On the upwards pass, or... */
7612 || cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */
7613 ){
7614 int iNew;
7615 int iOld;
7616 int nNewCell;
7617
7618 /* Verify condition (1): If cells are moving left, update iPg
7619 ** only after iPg-1 has already been updated. */
7620 assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] );
7621
7622 /* Verify condition (2): If cells are moving right, update iPg
7623 ** only after iPg+1 has already been updated. */
7624 assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] );
7625
7626 if( iPg==0 ){
7627 iNew = iOld = 0;
7628 nNewCell = cntNew[0];
6720 }else{ 7629 }else{
6721 pCell -= 4; 7630 iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : b.nCell;
6722 /* Obscure case for non-leaf-data trees: If the cell at pCell was 7631 iNew = cntNew[iPg-1] + !leafData;
6723 ** previously stored on a leaf node, and its reported size was 4 7632 nNewCell = cntNew[iPg] - iNew;
6724 ** bytes, then it may actually be smaller than this 7633 }
6725 ** (see btreeParseCellPtr(), 4 bytes is the minimum size of 7634
6726 ** any cell). But it is important to pass the correct size to 7635 rc = editPage(apNew[iPg], iOld, iNew, nNewCell, &b);
6727 ** insertCell(), so reparse the cell now. 7636 if( rc ) goto balance_cleanup;
6728 ** 7637 abDone[iPg]++;
6729 ** Note that this can never happen in an SQLite data file, as all 7638 apNew[iPg]->nFree = usableSpace-szNew[iPg];
6730 ** cells are at least 4 bytes. It only happens in b-trees used 7639 assert( apNew[iPg]->nOverflow==0 );
6731 ** to evaluate "IN (SELECT ...)" and similar clauses. 7640 assert( apNew[iPg]->nCell==nNewCell );
6732 */ 7641 }
6733 if( szCell[j]==4 ){ 7642 }
6734 assert(leafCorrection==4); 7643
6735 sz = cellSizePtr(pParent, pCell); 7644 /* All pages have been processed exactly once */
6736 } 7645 assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 );
6737 } 7646
6738 iOvflSpace += sz;
6739 assert( sz<=pBt->maxLocal+23 );
6740 assert( iOvflSpace <= (int)pBt->pageSize );
6741 insertCell(pParent, nxDiv, pCell, sz, pTemp, pNew->pgno, &rc);
6742 if( rc!=SQLITE_OK ) goto balance_cleanup;
6743 assert( sqlite3PagerIswriteable(pParent->pDbPage) );
6744
6745 j++;
6746 nxDiv++;
6747 }
6748 }
6749 assert( j==nCell );
6750 assert( nOld>0 ); 7647 assert( nOld>0 );
6751 assert( nNew>0 ); 7648 assert( nNew>0 );
6752 if( (pageFlags & PTF_LEAF)==0 ){
6753 u8 *zChild = &apCopy[nOld-1]->aData[8];
6754 memcpy(&apNew[nNew-1]->aData[8], zChild, 4);
6755 }
6756 7649
6757 if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){ 7650 if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){
6758 /* The root page of the b-tree now contains no cells. The only sibling 7651 /* The root page of the b-tree now contains no cells. The only sibling
6759 ** page is the right-child of the parent. Copy the contents of the 7652 ** page is the right-child of the parent. Copy the contents of the
6760 ** child page into the parent, decreasing the overall height of the 7653 ** child page into the parent, decreasing the overall height of the
6761 ** b-tree structure by one. This is described as the "balance-shallower" 7654 ** b-tree structure by one. This is described as the "balance-shallower"
6762 ** sub-algorithm in some documentation. 7655 ** sub-algorithm in some documentation.
6763 ** 7656 **
6764 ** If this is an auto-vacuum database, the call to copyNodeContent() 7657 ** If this is an auto-vacuum database, the call to copyNodeContent()
6765 ** sets all pointer-map entries corresponding to database image pages 7658 ** sets all pointer-map entries corresponding to database image pages
6766 ** for which the pointer is stored within the content being copied. 7659 ** for which the pointer is stored within the content being copied.
6767 ** 7660 **
6768 ** The second assert below verifies that the child page is defragmented 7661 ** It is critical that the child page be defragmented before being
6769 ** (it must be, as it was just reconstructed using assemblePage()). This 7662 ** copied into the parent, because if the parent is page 1 then it will
6770 ** is important if the parent page happens to be page 1 of the database 7663 ** by smaller than the child due to the database header, and so all the
6771 ** image. */ 7664 ** free space needs to be up front.
6772 assert( nNew==1 ); 7665 */
7666 assert( nNew==1 || CORRUPT_DB );
7667 rc = defragmentPage(apNew[0]);
7668 testcase( rc!=SQLITE_OK );
6773 assert( apNew[0]->nFree == 7669 assert( apNew[0]->nFree ==
6774 (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2) 7670 (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2)
7671 || rc!=SQLITE_OK
6775 ); 7672 );
6776 copyNodeContent(apNew[0], pParent, &rc); 7673 copyNodeContent(apNew[0], pParent, &rc);
6777 freePage(apNew[0], &rc); 7674 freePage(apNew[0], &rc);
6778 }else if( ISAUTOVACUUM ){ 7675 }else if( ISAUTOVACUUM && !leafCorrection ){
6779 /* Fix the pointer-map entries for all the cells that were shifted around. 7676 /* Fix the pointer map entries associated with the right-child of each
6780 ** There are several different types of pointer-map entries that need to 7677 ** sibling page. All other pointer map entries have already been taken
6781 ** be dealt with by this routine. Some of these have been set already, but 7678 ** care of. */
6782 ** many have not. The following is a summary: 7679 for(i=0; i<nNew; i++){
6783 ** 7680 u32 key = get4byte(&apNew[i]->aData[8]);
6784 ** 1) The entries associated with new sibling pages that were not 7681 ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
6785 ** siblings when this function was called. These have already 7682 }
6786 ** been set. We don't need to worry about old siblings that were 7683 }
6787 ** moved to the free-list - the freePage() code has taken care
6788 ** of those.
6789 **
6790 ** 2) The pointer-map entries associated with the first overflow
6791 ** page in any overflow chains used by new divider cells. These
6792 ** have also already been taken care of by the insertCell() code.
6793 **
6794 ** 3) If the sibling pages are not leaves, then the child pages of
6795 ** cells stored on the sibling pages may need to be updated.
6796 **
6797 ** 4) If the sibling pages are not internal intkey nodes, then any
6798 ** overflow pages used by these cells may need to be updated
6799 ** (internal intkey nodes never contain pointers to overflow pages).
6800 **
6801 ** 5) If the sibling pages are not leaves, then the pointer-map
6802 ** entries for the right-child pages of each sibling may need
6803 ** to be updated.
6804 **
6805 ** Cases 1 and 2 are dealt with above by other code. The next
6806 ** block deals with cases 3 and 4 and the one after that, case 5. Since
6807 ** setting a pointer map entry is a relatively expensive operation, this
6808 ** code only sets pointer map entries for child or overflow pages that have
6809 ** actually moved between pages. */
6810 MemPage *pNew = apNew[0];
6811 MemPage *pOld = apCopy[0];
6812 int nOverflow = pOld->nOverflow;
6813 int iNextOld = pOld->nCell + nOverflow;
6814 int iOverflow = (nOverflow ? pOld->aiOvfl[0] : -1);
6815 j = 0; /* Current 'old' sibling page */
6816 k = 0; /* Current 'new' sibling page */
6817 for(i=0; i<nCell; i++){
6818 int isDivider = 0;
6819 while( i==iNextOld ){
6820 /* Cell i is the cell immediately following the last cell on old
6821 ** sibling page j. If the siblings are not leaf pages of an
6822 ** intkey b-tree, then cell i was a divider cell. */
6823 assert( j+1 < ArraySize(apCopy) );
6824 assert( j+1 < nOld );
6825 pOld = apCopy[++j];
6826 iNextOld = i + !leafData + pOld->nCell + pOld->nOverflow;
6827 if( pOld->nOverflow ){
6828 nOverflow = pOld->nOverflow;
6829 iOverflow = i + !leafData + pOld->aiOvfl[0];
6830 }
6831 isDivider = !leafData;
6832 }
6833 7684
6834 assert(nOverflow>0 || iOverflow<i ); 7685 assert( pParent->isInit );
6835 assert(nOverflow<2 || pOld->aiOvfl[0]==pOld->aiOvfl[1]-1); 7686 TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
6836 assert(nOverflow<3 || pOld->aiOvfl[1]==pOld->aiOvfl[2]-1); 7687 nOld, nNew, b.nCell));
6837 if( i==iOverflow ){
6838 isDivider = 1;
6839 if( (--nOverflow)>0 ){
6840 iOverflow++;
6841 }
6842 }
6843 7688
6844 if( i==cntNew[k] ){ 7689 /* Free any old pages that were not reused as new pages.
6845 /* Cell i is the cell immediately following the last cell on new 7690 */
6846 ** sibling page k. If the siblings are not leaf pages of an 7691 for(i=nNew; i<nOld; i++){
6847 ** intkey b-tree, then cell i is a divider cell. */ 7692 freePage(apOld[i], &rc);
6848 pNew = apNew[++k]; 7693 }
6849 if( !leafData ) continue;
6850 }
6851 assert( j<nOld );
6852 assert( k<nNew );
6853
6854 /* If the cell was originally divider cell (and is not now) or
6855 ** an overflow cell, or if the cell was located on a different sibling
6856 ** page before the balancing, then the pointer map entries associated
6857 ** with any child or overflow pages need to be updated. */
6858 if( isDivider || pOld->pgno!=pNew->pgno ){
6859 if( !leafCorrection ){
6860 ptrmapPut(pBt, get4byte(apCell[i]), PTRMAP_BTREE, pNew->pgno, &rc);
6861 }
6862 if( szCell[i]>pNew->minLocal ){
6863 ptrmapPutOvflPtr(pNew, apCell[i], &rc);
6864 }
6865 }
6866 }
6867
6868 if( !leafCorrection ){
6869 for(i=0; i<nNew; i++){
6870 u32 key = get4byte(&apNew[i]->aData[8]);
6871 ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc);
6872 }
6873 }
6874 7694
6875 #if 0 7695 #if 0
7696 if( ISAUTOVACUUM && rc==SQLITE_OK && apNew[0]->isInit ){
6876 /* The ptrmapCheckPages() contains assert() statements that verify that 7697 /* The ptrmapCheckPages() contains assert() statements that verify that
6877 ** all pointer map pages are set correctly. This is helpful while 7698 ** all pointer map pages are set correctly. This is helpful while
6878 ** debugging. This is usually disabled because a corrupt database may 7699 ** debugging. This is usually disabled because a corrupt database may
6879 ** cause an assert() statement to fail. */ 7700 ** cause an assert() statement to fail. */
6880 ptrmapCheckPages(apNew, nNew); 7701 ptrmapCheckPages(apNew, nNew);
6881 ptrmapCheckPages(&pParent, 1); 7702 ptrmapCheckPages(&pParent, 1);
7703 }
6882 #endif 7704 #endif
6883 }
6884
6885 assert( pParent->isInit );
6886 TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n",
6887 nOld, nNew, nCell));
6888 7705
6889 /* 7706 /*
6890 ** Cleanup before returning. 7707 ** Cleanup before returning.
6891 */ 7708 */
6892 balance_cleanup: 7709 balance_cleanup:
6893 sqlite3ScratchFree(apCell); 7710 sqlite3ScratchFree(b.apCell);
6894 for(i=0; i<nOld; i++){ 7711 for(i=0; i<nOld; i++){
6895 releasePage(apOld[i]); 7712 releasePage(apOld[i]);
6896 } 7713 }
6897 for(i=0; i<nNew; i++){ 7714 for(i=0; i<nNew; i++){
6898 releasePage(apNew[i]); 7715 releasePage(apNew[i]);
6899 } 7716 }
6900 7717
6901 return rc; 7718 return rc;
6902 } 7719 }
6903 #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
6904 #pragma optimize("", on)
6905 #endif
6906 7720
6907 7721
6908 /* 7722 /*
6909 ** This function is called when the root page of a b-tree structure is 7723 ** This function is called when the root page of a b-tree structure is
6910 ** overfull (has one or more overflow pages). 7724 ** overfull (has one or more overflow pages).
6911 ** 7725 **
6912 ** A new child page is allocated and the contents of the current root 7726 ** A new child page is allocated and the contents of the current root
6913 ** page, including overflow cells, are copied into the child. The root 7727 ** page, including overflow cells, are copied into the child. The root
6914 ** page is then overwritten to make it an empty page with the right-child 7728 ** page is then overwritten to make it an empty page with the right-child
6915 ** pointer pointing to the new page. 7729 ** pointer pointing to the new page.
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
7056 ** A subsequent iteration of the do-loop will deal with this by 7870 ** A subsequent iteration of the do-loop will deal with this by
7057 ** calling balance_nonroot() (balance_deeper() may be called first, 7871 ** calling balance_nonroot() (balance_deeper() may be called first,
7058 ** but it doesn't deal with overflow cells - just moves them to a 7872 ** but it doesn't deal with overflow cells - just moves them to a
7059 ** different page). Once this subsequent call to balance_nonroot() 7873 ** different page). Once this subsequent call to balance_nonroot()
7060 ** has completed, it is safe to release the pSpace buffer used by 7874 ** has completed, it is safe to release the pSpace buffer used by
7061 ** the previous call, as the overflow cell data will have been 7875 ** the previous call, as the overflow cell data will have been
7062 ** copied either into the body of a database page or into the new 7876 ** copied either into the body of a database page or into the new
7063 ** pSpace buffer passed to the latter call to balance_nonroot(). 7877 ** pSpace buffer passed to the latter call to balance_nonroot().
7064 */ 7878 */
7065 u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); 7879 u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize);
7066 rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, pCur->hints); 7880 rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1,
7881 pCur->hints&BTREE_BULKLOAD);
7067 if( pFree ){ 7882 if( pFree ){
7068 /* If pFree is not NULL, it points to the pSpace buffer used 7883 /* If pFree is not NULL, it points to the pSpace buffer used
7069 ** by a previous call to balance_nonroot(). Its contents are 7884 ** by a previous call to balance_nonroot(). Its contents are
7070 ** now stored either on real database pages or within the 7885 ** now stored either on real database pages or within the
7071 ** new pSpace buffer, so it may be safely freed here. */ 7886 ** new pSpace buffer, so it may be safely freed here. */
7072 sqlite3PageFree(pFree); 7887 sqlite3PageFree(pFree);
7073 } 7888 }
7074 7889
7075 /* The pSpace buffer will be freed after the next call to 7890 /* The pSpace buffer will be freed after the next call to
7076 ** balance_nonroot(), or just before this function returns, whichever 7891 ** balance_nonroot(), or just before this function returns, whichever
7077 ** comes first. */ 7892 ** comes first. */
7078 pFree = pSpace; 7893 pFree = pSpace;
7079 } 7894 }
7080 } 7895 }
7081 7896
7082 pPage->nOverflow = 0; 7897 pPage->nOverflow = 0;
7083 7898
7084 /* The next iteration of the do-loop balances the parent page. */ 7899 /* The next iteration of the do-loop balances the parent page. */
7085 releasePage(pPage); 7900 releasePage(pPage);
7086 pCur->iPage--; 7901 pCur->iPage--;
7902 assert( pCur->iPage>=0 );
7087 } 7903 }
7088 }while( rc==SQLITE_OK ); 7904 }while( rc==SQLITE_OK );
7089 7905
7090 if( pFree ){ 7906 if( pFree ){
7091 sqlite3PageFree(pFree); 7907 sqlite3PageFree(pFree);
7092 } 7908 }
7093 return rc; 7909 return rc;
7094 } 7910 }
7095 7911
7096 7912
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
7156 ** 7972 **
7157 ** In some cases, the call to btreeMoveto() below is a no-op. For 7973 ** In some cases, the call to btreeMoveto() below is a no-op. For
7158 ** example, when inserting data into a table with auto-generated integer 7974 ** example, when inserting data into a table with auto-generated integer
7159 ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the 7975 ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the
7160 ** integer key to use. It then calls this function to actually insert the 7976 ** integer key to use. It then calls this function to actually insert the
7161 ** data into the intkey B-Tree. In this case btreeMoveto() recognizes 7977 ** data into the intkey B-Tree. In this case btreeMoveto() recognizes
7162 ** that the cursor is already where it needs to be and returns without 7978 ** that the cursor is already where it needs to be and returns without
7163 ** doing any work. To avoid thwarting these optimizations, it is important 7979 ** doing any work. To avoid thwarting these optimizations, it is important
7164 ** not to clear the cursor here. 7980 ** not to clear the cursor here.
7165 */ 7981 */
7166 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); 7982 if( pCur->curFlags & BTCF_Multiple ){
7167 if( rc ) return rc; 7983 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
7984 if( rc ) return rc;
7985 }
7168 7986
7169 if( pCur->pKeyInfo==0 ){ 7987 if( pCur->pKeyInfo==0 ){
7988 assert( pKey==0 );
7170 /* If this is an insert into a table b-tree, invalidate any incrblob 7989 /* If this is an insert into a table b-tree, invalidate any incrblob
7171 ** cursors open on the row being replaced */ 7990 ** cursors open on the row being replaced */
7172 invalidateIncrblobCursors(p, nKey, 0); 7991 invalidateIncrblobCursors(p, nKey, 0);
7173 7992
7174 /* If the cursor is currently on the last row and we are appending a 7993 /* If the cursor is currently on the last row and we are appending a
7175 ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() 7994 ** new row onto the end, set the "loc" to avoid an unnecessary
7176 ** call */ 7995 ** btreeMoveto() call */
7177 if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 7996 if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0
7178 && pCur->info.nKey==nKey-1 ){ 7997 && pCur->info.nKey==nKey-1 ){
7179 loc = -1; 7998 loc = -1;
7999 }else if( loc==0 ){
8000 rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc);
8001 if( rc ) return rc;
7180 } 8002 }
7181 } 8003 }else if( loc==0 ){
7182
7183 if( !loc ){
7184 rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); 8004 rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc);
7185 if( rc ) return rc; 8005 if( rc ) return rc;
7186 } 8006 }
7187 assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) ); 8007 assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) );
7188 8008
7189 pPage = pCur->apPage[pCur->iPage]; 8009 pPage = pCur->apPage[pCur->iPage];
7190 assert( pPage->intKey || nKey>=0 ); 8010 assert( pPage->intKey || nKey>=0 );
7191 assert( pPage->leaf || !pPage->intKey ); 8011 assert( pPage->leaf || !pPage->intKey );
7192 8012
7193 TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", 8013 TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
7194 pCur->pgnoRoot, nKey, nData, pPage->pgno, 8014 pCur->pgnoRoot, nKey, nData, pPage->pgno,
7195 loc==0 ? "overwrite" : "new entry")); 8015 loc==0 ? "overwrite" : "new entry"));
7196 assert( pPage->isInit ); 8016 assert( pPage->isInit );
7197 newCell = pBt->pTmpSpace; 8017 newCell = pBt->pTmpSpace;
7198 assert( newCell!=0 ); 8018 assert( newCell!=0 );
7199 rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); 8019 rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
7200 if( rc ) goto end_insert; 8020 if( rc ) goto end_insert;
7201 assert( szNew==cellSizePtr(pPage, newCell) ); 8021 assert( szNew==pPage->xCellSize(pPage, newCell) );
7202 assert( szNew <= MX_CELL_SIZE(pBt) ); 8022 assert( szNew <= MX_CELL_SIZE(pBt) );
7203 idx = pCur->aiIdx[pCur->iPage]; 8023 idx = pCur->aiIdx[pCur->iPage];
7204 if( loc==0 ){ 8024 if( loc==0 ){
7205 u16 szOld; 8025 u16 szOld;
7206 assert( idx<pPage->nCell ); 8026 assert( idx<pPage->nCell );
7207 rc = sqlite3PagerWrite(pPage->pDbPage); 8027 rc = sqlite3PagerWrite(pPage->pDbPage);
7208 if( rc ){ 8028 if( rc ){
7209 goto end_insert; 8029 goto end_insert;
7210 } 8030 }
7211 oldCell = findCell(pPage, idx); 8031 oldCell = findCell(pPage, idx);
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
7256 pCur->apPage[pCur->iPage]->nOverflow = 0; 8076 pCur->apPage[pCur->iPage]->nOverflow = 0;
7257 pCur->eState = CURSOR_INVALID; 8077 pCur->eState = CURSOR_INVALID;
7258 } 8078 }
7259 assert( pCur->apPage[pCur->iPage]->nOverflow==0 ); 8079 assert( pCur->apPage[pCur->iPage]->nOverflow==0 );
7260 8080
7261 end_insert: 8081 end_insert:
7262 return rc; 8082 return rc;
7263 } 8083 }
7264 8084
7265 /* 8085 /*
7266 ** Delete the entry that the cursor is pointing to. The cursor 8086 ** Delete the entry that the cursor is pointing to.
7267 ** is left pointing at an arbitrary location. 8087 **
8088 ** If the second parameter is zero, then the cursor is left pointing at an
8089 ** arbitrary location after the delete. If it is non-zero, then the cursor
8090 ** is left in a state such that the next call to BtreeNext() or BtreePrev()
8091 ** moves it to the same row as it would if the call to BtreeDelete() had
8092 ** been omitted.
7268 */ 8093 */
7269 int sqlite3BtreeDelete(BtCursor *pCur){ 8094 int sqlite3BtreeDelete(BtCursor *pCur, int bPreserve){
7270 Btree *p = pCur->pBtree; 8095 Btree *p = pCur->pBtree;
7271 BtShared *pBt = p->pBt; 8096 BtShared *pBt = p->pBt;
7272 int rc; /* Return code */ 8097 int rc; /* Return code */
7273 MemPage *pPage; /* Page to delete cell from */ 8098 MemPage *pPage; /* Page to delete cell from */
7274 unsigned char *pCell; /* Pointer to cell to delete */ 8099 unsigned char *pCell; /* Pointer to cell to delete */
7275 int iCellIdx; /* Index of cell to delete */ 8100 int iCellIdx; /* Index of cell to delete */
7276 int iCellDepth; /* Depth of node containing pCell */ 8101 int iCellDepth; /* Depth of node containing pCell */
7277 u16 szCell; /* Size of the cell being deleted */ 8102 u16 szCell; /* Size of the cell being deleted */
8103 int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */
7278 8104
7279 assert( cursorHoldsMutex(pCur) ); 8105 assert( cursorHoldsMutex(pCur) );
7280 assert( pBt->inTransaction==TRANS_WRITE ); 8106 assert( pBt->inTransaction==TRANS_WRITE );
7281 assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); 8107 assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
7282 assert( pCur->curFlags & BTCF_WriteFlag ); 8108 assert( pCur->curFlags & BTCF_WriteFlag );
7283 assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); 8109 assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
7284 assert( !hasReadConflicts(p, pCur->pgnoRoot) ); 8110 assert( !hasReadConflicts(p, pCur->pgnoRoot) );
7285 8111 assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
7286 if( NEVER(pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell) 8112 assert( pCur->eState==CURSOR_VALID );
7287 || NEVER(pCur->eState!=CURSOR_VALID)
7288 ){
7289 return SQLITE_ERROR; /* Something has gone awry. */
7290 }
7291 8113
7292 iCellDepth = pCur->iPage; 8114 iCellDepth = pCur->iPage;
7293 iCellIdx = pCur->aiIdx[iCellDepth]; 8115 iCellIdx = pCur->aiIdx[iCellDepth];
7294 pPage = pCur->apPage[iCellDepth]; 8116 pPage = pCur->apPage[iCellDepth];
7295 pCell = findCell(pPage, iCellIdx); 8117 pCell = findCell(pPage, iCellIdx);
7296 8118
7297 /* If the page containing the entry to delete is not a leaf page, move 8119 /* If the page containing the entry to delete is not a leaf page, move
7298 ** the cursor to the largest entry in the tree that is smaller than 8120 ** the cursor to the largest entry in the tree that is smaller than
7299 ** the entry being deleted. This cell will replace the cell being deleted 8121 ** the entry being deleted. This cell will replace the cell being deleted
7300 ** from the internal node. The 'previous' entry is used for this instead 8122 ** from the internal node. The 'previous' entry is used for this instead
7301 ** of the 'next' entry, as the previous entry is always a part of the 8123 ** of the 'next' entry, as the previous entry is always a part of the
7302 ** sub-tree headed by the child page of the cell being deleted. This makes 8124 ** sub-tree headed by the child page of the cell being deleted. This makes
7303 ** balancing the tree following the delete operation easier. */ 8125 ** balancing the tree following the delete operation easier. */
7304 if( !pPage->leaf ){ 8126 if( !pPage->leaf ){
7305 int notUsed = 0; 8127 int notUsed = 0;
7306 rc = sqlite3BtreePrevious(pCur, &notUsed); 8128 rc = sqlite3BtreePrevious(pCur, &notUsed);
7307 if( rc ) return rc; 8129 if( rc ) return rc;
7308 } 8130 }
7309 8131
7310 /* Save the positions of any other cursors open on this table before 8132 /* Save the positions of any other cursors open on this table before
7311 ** making any modifications. Make the page containing the entry to be 8133 ** making any modifications. */
7312 ** deleted writable. Then free any overflow pages associated with the 8134 if( pCur->curFlags & BTCF_Multiple ){
7313 ** entry and finally remove the cell itself from within the page. 8135 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
7314 */ 8136 if( rc ) return rc;
7315 rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); 8137 }
7316 if( rc ) return rc;
7317 8138
7318 /* If this is a delete operation to remove a row from a table b-tree, 8139 /* If this is a delete operation to remove a row from a table b-tree,
7319 ** invalidate any incrblob cursors open on the row being deleted. */ 8140 ** invalidate any incrblob cursors open on the row being deleted. */
7320 if( pCur->pKeyInfo==0 ){ 8141 if( pCur->pKeyInfo==0 ){
7321 invalidateIncrblobCursors(p, pCur->info.nKey, 0); 8142 invalidateIncrblobCursors(p, pCur->info.nKey, 0);
7322 } 8143 }
7323 8144
8145 /* If the bPreserve flag is set to true, then the cursor position must
8146 ** be preserved following this delete operation. If the current delete
8147 ** will cause a b-tree rebalance, then this is done by saving the cursor
8148 ** key and leaving the cursor in CURSOR_REQUIRESEEK state before
8149 ** returning.
8150 **
8151 ** Or, if the current delete will not cause a rebalance, then the cursor
8152 ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately
8153 ** before or after the deleted entry. In this case set bSkipnext to true. */
8154 if( bPreserve ){
8155 if( !pPage->leaf
8156 || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3)
8157 ){
8158 /* A b-tree rebalance will be required after deleting this entry.
8159 ** Save the cursor key. */
8160 rc = saveCursorKey(pCur);
8161 if( rc ) return rc;
8162 }else{
8163 bSkipnext = 1;
8164 }
8165 }
8166
8167 /* Make the page containing the entry to be deleted writable. Then free any
8168 ** overflow pages associated with the entry and finally remove the cell
8169 ** itself from within the page. */
7324 rc = sqlite3PagerWrite(pPage->pDbPage); 8170 rc = sqlite3PagerWrite(pPage->pDbPage);
7325 if( rc ) return rc; 8171 if( rc ) return rc;
7326 rc = clearCell(pPage, pCell, &szCell); 8172 rc = clearCell(pPage, pCell, &szCell);
7327 dropCell(pPage, iCellIdx, szCell, &rc); 8173 dropCell(pPage, iCellIdx, szCell, &rc);
7328 if( rc ) return rc; 8174 if( rc ) return rc;
7329 8175
7330 /* If the cell deleted was not located on a leaf page, then the cursor 8176 /* If the cell deleted was not located on a leaf page, then the cursor
7331 ** is currently pointing to the largest entry in the sub-tree headed 8177 ** is currently pointing to the largest entry in the sub-tree headed
7332 ** by the child-page of the cell that was just deleted from an internal 8178 ** by the child-page of the cell that was just deleted from an internal
7333 ** node. The cell from the leaf node needs to be moved to the internal 8179 ** node. The cell from the leaf node needs to be moved to the internal
7334 ** node to replace the deleted cell. */ 8180 ** node to replace the deleted cell. */
7335 if( !pPage->leaf ){ 8181 if( !pPage->leaf ){
7336 MemPage *pLeaf = pCur->apPage[pCur->iPage]; 8182 MemPage *pLeaf = pCur->apPage[pCur->iPage];
7337 int nCell; 8183 int nCell;
7338 Pgno n = pCur->apPage[iCellDepth+1]->pgno; 8184 Pgno n = pCur->apPage[iCellDepth+1]->pgno;
7339 unsigned char *pTmp; 8185 unsigned char *pTmp;
7340 8186
7341 pCell = findCell(pLeaf, pLeaf->nCell-1); 8187 pCell = findCell(pLeaf, pLeaf->nCell-1);
7342 nCell = cellSizePtr(pLeaf, pCell); 8188 if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT;
8189 nCell = pLeaf->xCellSize(pLeaf, pCell);
7343 assert( MX_CELL_SIZE(pBt) >= nCell ); 8190 assert( MX_CELL_SIZE(pBt) >= nCell );
7344 pTmp = pBt->pTmpSpace; 8191 pTmp = pBt->pTmpSpace;
7345 assert( pTmp!=0 ); 8192 assert( pTmp!=0 );
7346 rc = sqlite3PagerWrite(pLeaf->pDbPage); 8193 rc = sqlite3PagerWrite(pLeaf->pDbPage);
7347 insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); 8194 insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
7348 dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); 8195 dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
7349 if( rc ) return rc; 8196 if( rc ) return rc;
7350 } 8197 }
7351 8198
7352 /* Balance the tree. If the entry deleted was located on a leaf page, 8199 /* Balance the tree. If the entry deleted was located on a leaf page,
(...skipping 13 matching lines...) Expand all
7366 ** well. */ 8213 ** well. */
7367 rc = balance(pCur); 8214 rc = balance(pCur);
7368 if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){ 8215 if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){
7369 while( pCur->iPage>iCellDepth ){ 8216 while( pCur->iPage>iCellDepth ){
7370 releasePage(pCur->apPage[pCur->iPage--]); 8217 releasePage(pCur->apPage[pCur->iPage--]);
7371 } 8218 }
7372 rc = balance(pCur); 8219 rc = balance(pCur);
7373 } 8220 }
7374 8221
7375 if( rc==SQLITE_OK ){ 8222 if( rc==SQLITE_OK ){
7376 moveToRoot(pCur); 8223 if( bSkipnext ){
8224 assert( bPreserve && (pCur->iPage==iCellDepth || CORRUPT_DB) );
8225 assert( pPage==pCur->apPage[pCur->iPage] );
8226 assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell );
8227 pCur->eState = CURSOR_SKIPNEXT;
8228 if( iCellIdx>=pPage->nCell ){
8229 pCur->skipNext = -1;
8230 pCur->aiIdx[iCellDepth] = pPage->nCell-1;
8231 }else{
8232 pCur->skipNext = 1;
8233 }
8234 }else{
8235 rc = moveToRoot(pCur);
8236 if( bPreserve ){
8237 pCur->eState = CURSOR_REQUIRESEEK;
8238 }
8239 }
7377 } 8240 }
7378 return rc; 8241 return rc;
7379 } 8242 }
7380 8243
7381 /* 8244 /*
7382 ** Create a new BTree table. Write into *piTable the page 8245 ** Create a new BTree table. Write into *piTable the page
7383 ** number for the root page of the new table. 8246 ** number for the root page of the new table.
7384 ** 8247 **
7385 ** The type of type is determined by the flags parameter. Only the 8248 ** The type of type is determined by the flags parameter. Only the
7386 ** following values of flags are currently in use. Other values for 8249 ** following values of flags are currently in use. Other values for
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
7424 sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot); 8287 sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot);
7425 pgnoRoot++; 8288 pgnoRoot++;
7426 8289
7427 /* The new root-page may not be allocated on a pointer-map page, or the 8290 /* The new root-page may not be allocated on a pointer-map page, or the
7428 ** PENDING_BYTE page. 8291 ** PENDING_BYTE page.
7429 */ 8292 */
7430 while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || 8293 while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
7431 pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ 8294 pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
7432 pgnoRoot++; 8295 pgnoRoot++;
7433 } 8296 }
7434 assert( pgnoRoot>=3 ); 8297 assert( pgnoRoot>=3 || CORRUPT_DB );
8298 testcase( pgnoRoot<3 );
7435 8299
7436 /* Allocate a page. The page that currently resides at pgnoRoot will 8300 /* Allocate a page. The page that currently resides at pgnoRoot will
7437 ** be moved to the allocated page (unless the allocated page happens 8301 ** be moved to the allocated page (unless the allocated page happens
7438 ** to reside at pgnoRoot). 8302 ** to reside at pgnoRoot).
7439 */ 8303 */
7440 rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); 8304 rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT);
7441 if( rc!=SQLITE_OK ){ 8305 if( rc!=SQLITE_OK ){
7442 return rc; 8306 return rc;
7443 } 8307 }
7444 8308
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
7553 int rc; 8417 int rc;
7554 unsigned char *pCell; 8418 unsigned char *pCell;
7555 int i; 8419 int i;
7556 int hdr; 8420 int hdr;
7557 u16 szCell; 8421 u16 szCell;
7558 8422
7559 assert( sqlite3_mutex_held(pBt->mutex) ); 8423 assert( sqlite3_mutex_held(pBt->mutex) );
7560 if( pgno>btreePagecount(pBt) ){ 8424 if( pgno>btreePagecount(pBt) ){
7561 return SQLITE_CORRUPT_BKPT; 8425 return SQLITE_CORRUPT_BKPT;
7562 } 8426 }
7563 8427 rc = getAndInitPage(pBt, pgno, &pPage, 0, 0);
7564 rc = getAndInitPage(pBt, pgno, &pPage, 0);
7565 if( rc ) return rc; 8428 if( rc ) return rc;
8429 if( pPage->bBusy ){
8430 rc = SQLITE_CORRUPT_BKPT;
8431 goto cleardatabasepage_out;
8432 }
8433 pPage->bBusy = 1;
7566 hdr = pPage->hdrOffset; 8434 hdr = pPage->hdrOffset;
7567 for(i=0; i<pPage->nCell; i++){ 8435 for(i=0; i<pPage->nCell; i++){
7568 pCell = findCell(pPage, i); 8436 pCell = findCell(pPage, i);
7569 if( !pPage->leaf ){ 8437 if( !pPage->leaf ){
7570 rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); 8438 rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange);
7571 if( rc ) goto cleardatabasepage_out; 8439 if( rc ) goto cleardatabasepage_out;
7572 } 8440 }
7573 rc = clearCell(pPage, pCell, &szCell); 8441 rc = clearCell(pPage, pCell, &szCell);
7574 if( rc ) goto cleardatabasepage_out; 8442 if( rc ) goto cleardatabasepage_out;
7575 } 8443 }
7576 if( !pPage->leaf ){ 8444 if( !pPage->leaf ){
7577 rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); 8445 rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange);
7578 if( rc ) goto cleardatabasepage_out; 8446 if( rc ) goto cleardatabasepage_out;
7579 }else if( pnChange ){ 8447 }else if( pnChange ){
7580 assert( pPage->intKey ); 8448 assert( pPage->intKey || CORRUPT_DB );
8449 testcase( !pPage->intKey );
7581 *pnChange += pPage->nCell; 8450 *pnChange += pPage->nCell;
7582 } 8451 }
7583 if( freePageFlag ){ 8452 if( freePageFlag ){
7584 freePage(pPage, &rc); 8453 freePage(pPage, &rc);
7585 }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ 8454 }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
7586 zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); 8455 zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF);
7587 } 8456 }
7588 8457
7589 cleardatabasepage_out: 8458 cleardatabasepage_out:
8459 pPage->bBusy = 0;
7590 releasePage(pPage); 8460 releasePage(pPage);
7591 return rc; 8461 return rc;
7592 } 8462 }
7593 8463
7594 /* 8464 /*
7595 ** Delete all information from a single table in the database. iTable is 8465 ** Delete all information from a single table in the database. iTable is
7596 ** the page number of the root of the table. After this routine returns, 8466 ** the page number of the root of the table. After this routine returns,
7597 ** the root page is empty, but still exists. 8467 ** the root page is empty, but still exists.
7598 ** 8468 **
7599 ** This routine will fail with SQLITE_LOCKED if there are any open 8469 ** This routine will fail with SQLITE_LOCKED if there are any open
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after
7769 ** has a read or write transaction open on the database. 8639 ** has a read or write transaction open on the database.
7770 ** 8640 **
7771 ** Read the meta-information out of a database file. Meta[0] 8641 ** Read the meta-information out of a database file. Meta[0]
7772 ** is the number of free pages currently in the database. Meta[1] 8642 ** is the number of free pages currently in the database. Meta[1]
7773 ** through meta[15] are available for use by higher layers. Meta[0] 8643 ** through meta[15] are available for use by higher layers. Meta[0]
7774 ** is read-only, the others are read/write. 8644 ** is read-only, the others are read/write.
7775 ** 8645 **
7776 ** The schema layer numbers meta values differently. At the schema 8646 ** The schema layer numbers meta values differently. At the schema
7777 ** layer (and the SetCookie and ReadCookie opcodes) the number of 8647 ** layer (and the SetCookie and ReadCookie opcodes) the number of
7778 ** free pages is not visible. So Cookie[0] is the same as Meta[1]. 8648 ** free pages is not visible. So Cookie[0] is the same as Meta[1].
8649 **
8650 ** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead
8651 ** of reading the value out of the header, it instead loads the "DataVersion"
8652 ** from the pager. The BTREE_DATA_VERSION value is not actually stored in the
8653 ** database file. It is a number computed by the pager. But its access
8654 ** pattern is the same as header meta values, and so it is convenient to
8655 ** read it from this routine.
7779 */ 8656 */
7780 void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ 8657 void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
7781 BtShared *pBt = p->pBt; 8658 BtShared *pBt = p->pBt;
7782 8659
7783 sqlite3BtreeEnter(p); 8660 sqlite3BtreeEnter(p);
7784 assert( p->inTrans>TRANS_NONE ); 8661 assert( p->inTrans>TRANS_NONE );
7785 assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) ); 8662 assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) );
7786 assert( pBt->pPage1 ); 8663 assert( pBt->pPage1 );
7787 assert( idx>=0 && idx<=15 ); 8664 assert( idx>=0 && idx<=15 );
7788 8665
7789 *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); 8666 if( idx==BTREE_DATA_VERSION ){
8667 *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion;
8668 }else{
8669 *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]);
8670 }
7790 8671
7791 /* If auto-vacuum is disabled in this build and this is an auto-vacuum 8672 /* If auto-vacuum is disabled in this build and this is an auto-vacuum
7792 ** database, mark the database as read-only. */ 8673 ** database, mark the database as read-only. */
7793 #ifdef SQLITE_OMIT_AUTOVACUUM 8674 #ifdef SQLITE_OMIT_AUTOVACUUM
7794 if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){ 8675 if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){
7795 pBt->btsFlags |= BTS_READ_ONLY; 8676 pBt->btsFlags |= BTS_READ_ONLY;
7796 } 8677 }
7797 #endif 8678 #endif
7798 8679
7799 sqlite3BtreeLeave(p); 8680 sqlite3BtreeLeave(p);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
7870 ** to visit is the right-child of its parent. 8751 ** to visit is the right-child of its parent.
7871 ** 8752 **
7872 ** If all pages in the tree have been visited, return SQLITE_OK to the 8753 ** If all pages in the tree have been visited, return SQLITE_OK to the
7873 ** caller. 8754 ** caller.
7874 */ 8755 */
7875 if( pPage->leaf ){ 8756 if( pPage->leaf ){
7876 do { 8757 do {
7877 if( pCur->iPage==0 ){ 8758 if( pCur->iPage==0 ){
7878 /* All pages of the b-tree have been visited. Return successfully. */ 8759 /* All pages of the b-tree have been visited. Return successfully. */
7879 *pnEntry = nEntry; 8760 *pnEntry = nEntry;
7880 return SQLITE_OK; 8761 return moveToRoot(pCur);
7881 } 8762 }
7882 moveToParent(pCur); 8763 moveToParent(pCur);
7883 }while ( pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell ); 8764 }while ( pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell );
7884 8765
7885 pCur->aiIdx[pCur->iPage]++; 8766 pCur->aiIdx[pCur->iPage]++;
7886 pPage = pCur->apPage[pCur->iPage]; 8767 pPage = pCur->apPage[pCur->iPage];
7887 } 8768 }
7888 8769
7889 /* Descend to the child node of the cell that the cursor currently 8770 /* Descend to the child node of the cell that the cursor currently
7890 ** points at. This is the right-child if (iIdx==pPage->nCell). 8771 ** points at. This is the right-child if (iIdx==pPage->nCell).
(...skipping 22 matching lines...) Expand all
7913 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 8794 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
7914 /* 8795 /*
7915 ** Append a message to the error message string. 8796 ** Append a message to the error message string.
7916 */ 8797 */
7917 static void checkAppendMsg( 8798 static void checkAppendMsg(
7918 IntegrityCk *pCheck, 8799 IntegrityCk *pCheck,
7919 const char *zFormat, 8800 const char *zFormat,
7920 ... 8801 ...
7921 ){ 8802 ){
7922 va_list ap; 8803 va_list ap;
7923 char zBuf[200];
7924 if( !pCheck->mxErr ) return; 8804 if( !pCheck->mxErr ) return;
7925 pCheck->mxErr--; 8805 pCheck->mxErr--;
7926 pCheck->nErr++; 8806 pCheck->nErr++;
7927 va_start(ap, zFormat); 8807 va_start(ap, zFormat);
7928 if( pCheck->errMsg.nChar ){ 8808 if( pCheck->errMsg.nChar ){
7929 sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); 8809 sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1);
7930 } 8810 }
7931 if( pCheck->zPfx ){ 8811 if( pCheck->zPfx ){
7932 sqlite3_snprintf(sizeof(zBuf), zBuf, pCheck->zPfx, pCheck->v1, pCheck->v2); 8812 sqlite3XPrintf(&pCheck->errMsg, 0, pCheck->zPfx, pCheck->v1, pCheck->v2);
7933 sqlite3StrAccumAppendAll(&pCheck->errMsg, zBuf);
7934 } 8813 }
7935 sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); 8814 sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap);
7936 va_end(ap); 8815 va_end(ap);
7937 if( pCheck->errMsg.accError==STRACCUM_NOMEM ){ 8816 if( pCheck->errMsg.accError==STRACCUM_NOMEM ){
7938 pCheck->mallocFailed = 1; 8817 pCheck->mallocFailed = 1;
7939 } 8818 }
7940 } 8819 }
7941 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 8820 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
7942 8821
7943 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 8822 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
8029 while( N-- > 0 && pCheck->mxErr ){ 8908 while( N-- > 0 && pCheck->mxErr ){
8030 DbPage *pOvflPage; 8909 DbPage *pOvflPage;
8031 unsigned char *pOvflData; 8910 unsigned char *pOvflData;
8032 if( iPage<1 ){ 8911 if( iPage<1 ){
8033 checkAppendMsg(pCheck, 8912 checkAppendMsg(pCheck,
8034 "%d of %d pages missing from overflow list starting at %d", 8913 "%d of %d pages missing from overflow list starting at %d",
8035 N+1, expected, iFirst); 8914 N+1, expected, iFirst);
8036 break; 8915 break;
8037 } 8916 }
8038 if( checkRef(pCheck, iPage) ) break; 8917 if( checkRef(pCheck, iPage) ) break;
8039 if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ 8918 if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage, 0) ){
8040 checkAppendMsg(pCheck, "failed to get page %d", iPage); 8919 checkAppendMsg(pCheck, "failed to get page %d", iPage);
8041 break; 8920 break;
8042 } 8921 }
8043 pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); 8922 pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
8044 if( isFreeList ){ 8923 if( isFreeList ){
8045 int n = get4byte(&pOvflData[4]); 8924 int n = get4byte(&pOvflData[4]);
8046 #ifndef SQLITE_OMIT_AUTOVACUUM 8925 #ifndef SQLITE_OMIT_AUTOVACUUM
8047 if( pCheck->pBt->autoVacuum ){ 8926 if( pCheck->pBt->autoVacuum ){
8048 checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); 8927 checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0);
8049 } 8928 }
(...skipping 22 matching lines...) Expand all
8072 ** the following page matches iPage. 8951 ** the following page matches iPage.
8073 */ 8952 */
8074 if( pCheck->pBt->autoVacuum && N>0 ){ 8953 if( pCheck->pBt->autoVacuum && N>0 ){
8075 i = get4byte(pOvflData); 8954 i = get4byte(pOvflData);
8076 checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); 8955 checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage);
8077 } 8956 }
8078 } 8957 }
8079 #endif 8958 #endif
8080 iPage = get4byte(pOvflData); 8959 iPage = get4byte(pOvflData);
8081 sqlite3PagerUnref(pOvflPage); 8960 sqlite3PagerUnref(pOvflPage);
8961
8962 if( isFreeList && N<(iPage!=0) ){
8963 checkAppendMsg(pCheck, "free-page count in header is too small");
8964 }
8082 } 8965 }
8083 } 8966 }
8084 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 8967 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
8085 8968
8969 /*
8970 ** An implementation of a min-heap.
8971 **
8972 ** aHeap[0] is the number of elements on the heap. aHeap[1] is the
8973 ** root element. The daughter nodes of aHeap[N] are aHeap[N*2]
8974 ** and aHeap[N*2+1].
8975 **
8976 ** The heap property is this: Every node is less than or equal to both
8977 ** of its daughter nodes. A consequence of the heap property is that the
8978 ** root node aHeap[1] is always the minimum value currently in the heap.
8979 **
8980 ** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto
8981 ** the heap, preserving the heap property. The btreeHeapPull() routine
8982 ** removes the root element from the heap (the minimum value in the heap)
8983 ** and then moves other nodes around as necessary to preserve the heap
8984 ** property.
8985 **
8986 ** This heap is used for cell overlap and coverage testing. Each u32
8987 ** entry represents the span of a cell or freeblock on a btree page.
8988 ** The upper 16 bits are the index of the first byte of a range and the
8989 ** lower 16 bits are the index of the last byte of that range.
8990 */
8991 static void btreeHeapInsert(u32 *aHeap, u32 x){
8992 u32 j, i = ++aHeap[0];
8993 aHeap[i] = x;
8994 while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){
8995 x = aHeap[j];
8996 aHeap[j] = aHeap[i];
8997 aHeap[i] = x;
8998 i = j;
8999 }
9000 }
9001 static int btreeHeapPull(u32 *aHeap, u32 *pOut){
9002 u32 j, i, x;
9003 if( (x = aHeap[0])==0 ) return 0;
9004 *pOut = aHeap[1];
9005 aHeap[1] = aHeap[x];
9006 aHeap[x] = 0xffffffff;
9007 aHeap[0]--;
9008 i = 1;
9009 while( (j = i*2)<=aHeap[0] ){
9010 if( aHeap[j]>aHeap[j+1] ) j++;
9011 if( aHeap[i]<aHeap[j] ) break;
9012 x = aHeap[i];
9013 aHeap[i] = aHeap[j];
9014 aHeap[j] = x;
9015 i = j;
9016 }
9017 return 1;
9018 }
9019
8086 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 9020 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
8087 /* 9021 /*
8088 ** Do various sanity checks on a single page of a tree. Return 9022 ** Do various sanity checks on a single page of a tree. Return
8089 ** the tree depth. Root pages return 0. Parents of root pages 9023 ** the tree depth. Root pages return 0. Parents of root pages
8090 ** return 1, and so forth. 9024 ** return 1, and so forth.
8091 ** 9025 **
8092 ** These checks are done: 9026 ** These checks are done:
8093 ** 9027 **
8094 ** 1. Make sure that cells and freeblocks do not overlap 9028 ** 1. Make sure that cells and freeblocks do not overlap
8095 ** but combine to completely cover the page. 9029 ** but combine to completely cover the page.
8096 ** NO 2. Make sure cell keys are in order. 9030 ** 2. Make sure integer cell keys are in order.
8097 ** NO 3. Make sure no key is less than or equal to zLowerBound. 9031 ** 3. Check the integrity of overflow pages.
8098 ** NO 4. Make sure no key is greater than or equal to zUpperBound. 9032 ** 4. Recursively call checkTreePage on all children.
8099 ** 5. Check the integrity of overflow pages. 9033 ** 5. Verify that the depth of all children is the same.
8100 ** 6. Recursively call checkTreePage on all children.
8101 ** 7. Verify that the depth of all children is the same.
8102 ** 8. Make sure this page is at least 33% full or else it is
8103 ** the root of the tree.
8104 */ 9034 */
8105 static int checkTreePage( 9035 static int checkTreePage(
8106 IntegrityCk *pCheck, /* Context for the sanity check */ 9036 IntegrityCk *pCheck, /* Context for the sanity check */
8107 int iPage, /* Page number of the page to check */ 9037 int iPage, /* Page number of the page to check */
8108 i64 *pnParentMinKey, 9038 i64 *piMinKey, /* Write minimum integer primary key here */
8109 i64 *pnParentMaxKey 9039 i64 maxKey /* Error if integer primary key greater than this */
8110 ){ 9040 ){
8111 MemPage *pPage; 9041 MemPage *pPage = 0; /* The page being analyzed */
8112 int i, rc, depth, d2, pgno, cnt; 9042 int i; /* Loop counter */
8113 int hdr, cellStart; 9043 int rc; /* Result code from subroutine call */
8114 int nCell; 9044 int depth = -1, d2; /* Depth of a subtree */
8115 u8 *data; 9045 int pgno; /* Page number */
8116 BtShared *pBt; 9046 int nFrag; /* Number of fragmented bytes on the page */
8117 int usableSize; 9047 int hdr; /* Offset to the page header */
8118 char *hit = 0; 9048 int cellStart; /* Offset to the start of the cell pointer array */
8119 i64 nMinKey = 0; 9049 int nCell; /* Number of cells */
8120 i64 nMaxKey = 0; 9050 int doCoverageCheck = 1; /* True if cell coverage checking should be done */
9051 int keyCanBeEqual = 1; /* True if IPK can be equal to maxKey
9052 ** False if IPK must be strictly less than maxKey */
9053 u8 *data; /* Page content */
9054 u8 *pCell; /* Cell content */
9055 u8 *pCellIdx; /* Next element of the cell pointer array */
9056 BtShared *pBt; /* The BtShared object that owns pPage */
9057 u32 pc; /* Address of a cell */
9058 u32 usableSize; /* Usable size of the page */
9059 u32 contentOffset; /* Offset to the start of the cell content area */
9060 u32 *heap = 0; /* Min-heap used for checking cell coverage */
9061 u32 x, prev = 0; /* Next and previous entry on the min-heap */
8121 const char *saved_zPfx = pCheck->zPfx; 9062 const char *saved_zPfx = pCheck->zPfx;
8122 int saved_v1 = pCheck->v1; 9063 int saved_v1 = pCheck->v1;
8123 int saved_v2 = pCheck->v2; 9064 int saved_v2 = pCheck->v2;
9065 u8 savedIsInit = 0;
8124 9066
8125 /* Check that the page exists 9067 /* Check that the page exists
8126 */ 9068 */
8127 pBt = pCheck->pBt; 9069 pBt = pCheck->pBt;
8128 usableSize = pBt->usableSize; 9070 usableSize = pBt->usableSize;
8129 if( iPage==0 ) return 0; 9071 if( iPage==0 ) return 0;
8130 if( checkRef(pCheck, iPage) ) return 0; 9072 if( checkRef(pCheck, iPage) ) return 0;
8131 pCheck->zPfx = "Page %d: "; 9073 pCheck->zPfx = "Page %d: ";
8132 pCheck->v1 = iPage; 9074 pCheck->v1 = iPage;
8133 if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ 9075 if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
8134 checkAppendMsg(pCheck, 9076 checkAppendMsg(pCheck,
8135 "unable to get the page. error code=%d", rc); 9077 "unable to get the page. error code=%d", rc);
8136 depth = -1;
8137 goto end_of_check; 9078 goto end_of_check;
8138 } 9079 }
8139 9080
8140 /* Clear MemPage.isInit to make sure the corruption detection code in 9081 /* Clear MemPage.isInit to make sure the corruption detection code in
8141 ** btreeInitPage() is executed. */ 9082 ** btreeInitPage() is executed. */
9083 savedIsInit = pPage->isInit;
8142 pPage->isInit = 0; 9084 pPage->isInit = 0;
8143 if( (rc = btreeInitPage(pPage))!=0 ){ 9085 if( (rc = btreeInitPage(pPage))!=0 ){
8144 assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ 9086 assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */
8145 checkAppendMsg(pCheck, 9087 checkAppendMsg(pCheck,
8146 "btreeInitPage() returns error code %d", rc); 9088 "btreeInitPage() returns error code %d", rc);
8147 releasePage(pPage);
8148 depth = -1;
8149 goto end_of_check; 9089 goto end_of_check;
8150 } 9090 }
9091 data = pPage->aData;
9092 hdr = pPage->hdrOffset;
8151 9093
8152 /* Check out all the cells. 9094 /* Set up for cell analysis */
8153 */ 9095 pCheck->zPfx = "On tree page %d cell %d: ";
8154 depth = 0; 9096 contentOffset = get2byteNotZero(&data[hdr+5]);
8155 for(i=0; i<pPage->nCell && pCheck->mxErr; i++){ 9097 assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */
8156 u8 *pCell; 9098
8157 u32 sz; 9099 /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the
9100 ** number of cells on the page. */
9101 nCell = get2byte(&data[hdr+3]);
9102 assert( pPage->nCell==nCell );
9103
9104 /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page
9105 ** immediately follows the b-tree page header. */
9106 cellStart = hdr + 12 - 4*pPage->leaf;
9107 assert( pPage->aCellIdx==&data[cellStart] );
9108 pCellIdx = &data[cellStart + 2*(nCell-1)];
9109
9110 if( !pPage->leaf ){
9111 /* Analyze the right-child page of internal pages */
9112 pgno = get4byte(&data[hdr+8]);
9113 #ifndef SQLITE_OMIT_AUTOVACUUM
9114 if( pBt->autoVacuum ){
9115 pCheck->zPfx = "On page %d at right child: ";
9116 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
9117 }
9118 #endif
9119 depth = checkTreePage(pCheck, pgno, &maxKey, maxKey);
9120 keyCanBeEqual = 0;
9121 }else{
9122 /* For leaf pages, the coverage check will occur in the same loop
9123 ** as the other cell checks, so initialize the heap. */
9124 heap = pCheck->heap;
9125 heap[0] = 0;
9126 }
9127
9128 /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
9129 ** integer offsets to the cell contents. */
9130 for(i=nCell-1; i>=0 && pCheck->mxErr; i--){
8158 CellInfo info; 9131 CellInfo info;
8159 9132
8160 /* Check payload overflow pages 9133 /* Check cell size */
8161 */
8162 pCheck->zPfx = "On tree page %d cell %d: ";
8163 pCheck->v1 = iPage;
8164 pCheck->v2 = i; 9134 pCheck->v2 = i;
8165 pCell = findCell(pPage,i); 9135 assert( pCellIdx==&data[cellStart + i*2] );
8166 btreeParseCellPtr(pPage, pCell, &info); 9136 pc = get2byteAligned(pCellIdx);
8167 sz = info.nPayload; 9137 pCellIdx -= 2;
8168 /* For intKey pages, check that the keys are in order. 9138 if( pc<contentOffset || pc>usableSize-4 ){
8169 */ 9139 checkAppendMsg(pCheck, "Offset %d out of range %d..%d",
9140 pc, contentOffset, usableSize-4);
9141 doCoverageCheck = 0;
9142 continue;
9143 }
9144 pCell = &data[pc];
9145 pPage->xParseCell(pPage, pCell, &info);
9146 if( pc+info.nSize>usableSize ){
9147 checkAppendMsg(pCheck, "Extends off end of page");
9148 doCoverageCheck = 0;
9149 continue;
9150 }
9151
9152 /* Check for integer primary key out of range */
8170 if( pPage->intKey ){ 9153 if( pPage->intKey ){
8171 if( i==0 ){ 9154 if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){
8172 nMinKey = nMaxKey = info.nKey; 9155 checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey);
8173 }else if( info.nKey <= nMaxKey ){
8174 checkAppendMsg(pCheck,
8175 "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey);
8176 } 9156 }
8177 nMaxKey = info.nKey; 9157 maxKey = info.nKey;
8178 } 9158 }
8179 if( (sz>info.nLocal) 9159
8180 && (&pCell[info.iOverflow]<=&pPage->aData[pBt->usableSize]) 9160 /* Check the content overflow list */
8181 ){ 9161 if( info.nPayload>info.nLocal ){
8182 int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4); 9162 int nPage; /* Number of pages on the overflow chain */
8183 Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); 9163 Pgno pgnoOvfl; /* First page of the overflow chain */
9164 assert( pc + info.nSize - 4 <= usableSize );
9165 nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4);
9166 pgnoOvfl = get4byte(&pCell[info.nSize - 4]);
8184 #ifndef SQLITE_OMIT_AUTOVACUUM 9167 #ifndef SQLITE_OMIT_AUTOVACUUM
8185 if( pBt->autoVacuum ){ 9168 if( pBt->autoVacuum ){
8186 checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); 9169 checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage);
8187 } 9170 }
8188 #endif 9171 #endif
8189 checkList(pCheck, 0, pgnoOvfl, nPage); 9172 checkList(pCheck, 0, pgnoOvfl, nPage);
8190 } 9173 }
8191 9174
8192 /* Check sanity of left child page.
8193 */
8194 if( !pPage->leaf ){ 9175 if( !pPage->leaf ){
9176 /* Check sanity of left child page for internal pages */
8195 pgno = get4byte(pCell); 9177 pgno = get4byte(pCell);
8196 #ifndef SQLITE_OMIT_AUTOVACUUM 9178 #ifndef SQLITE_OMIT_AUTOVACUUM
8197 if( pBt->autoVacuum ){ 9179 if( pBt->autoVacuum ){
8198 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); 9180 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
8199 } 9181 }
8200 #endif 9182 #endif
8201 d2 = checkTreePage(pCheck, pgno, &nMinKey, i==0?NULL:&nMaxKey); 9183 d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey);
8202 if( i>0 && d2!=depth ){ 9184 keyCanBeEqual = 0;
9185 if( d2!=depth ){
8203 checkAppendMsg(pCheck, "Child page depth differs"); 9186 checkAppendMsg(pCheck, "Child page depth differs");
9187 depth = d2;
8204 } 9188 }
8205 depth = d2; 9189 }else{
9190 /* Populate the coverage-checking heap for leaf pages */
9191 btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1));
9192 }
9193 }
9194 *piMinKey = maxKey;
9195
9196 /* Check for complete coverage of the page
9197 */
9198 pCheck->zPfx = 0;
9199 if( doCoverageCheck && pCheck->mxErr>0 ){
9200 /* For leaf pages, the min-heap has already been initialized and the
9201 ** cells have already been inserted. But for internal pages, that has
9202 ** not yet been done, so do it now */
9203 if( !pPage->leaf ){
9204 heap = pCheck->heap;
9205 heap[0] = 0;
9206 for(i=nCell-1; i>=0; i--){
9207 u32 size;
9208 pc = get2byteAligned(&data[cellStart+i*2]);
9209 size = pPage->xCellSize(pPage, &data[pc]);
9210 btreeHeapInsert(heap, (pc<<16)|(pc+size-1));
9211 }
9212 }
9213 /* Add the freeblocks to the min-heap
9214 **
9215 ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header
9216 ** is the offset of the first freeblock, or zero if there are no
9217 ** freeblocks on the page.
9218 */
9219 i = get2byte(&data[hdr+1]);
9220 while( i>0 ){
9221 int size, j;
9222 assert( (u32)i<=usableSize-4 ); /* Enforced by btreeInitPage() */
9223 size = get2byte(&data[i+2]);
9224 assert( (u32)(i+size)<=usableSize ); /* Enforced by btreeInitPage() */
9225 btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1));
9226 /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a
9227 ** big-endian integer which is the offset in the b-tree page of the next
9228 ** freeblock in the chain, or zero if the freeblock is the last on the
9229 ** chain. */
9230 j = get2byte(&data[i]);
9231 /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of
9232 ** increasing offset. */
9233 assert( j==0 || j>i+size ); /* Enforced by btreeInitPage() */
9234 assert( (u32)j<=usableSize-4 ); /* Enforced by btreeInitPage() */
9235 i = j;
9236 }
9237 /* Analyze the min-heap looking for overlap between cells and/or
9238 ** freeblocks, and counting the number of untracked bytes in nFrag.
9239 **
9240 ** Each min-heap entry is of the form: (start_address<<16)|end_address.
9241 ** There is an implied first entry the covers the page header, the cell
9242 ** pointer index, and the gap between the cell pointer index and the start
9243 ** of cell content.
9244 **
9245 ** The loop below pulls entries from the min-heap in order and compares
9246 ** the start_address against the previous end_address. If there is an
9247 ** overlap, that means bytes are used multiple times. If there is a gap,
9248 ** that gap is added to the fragmentation count.
9249 */
9250 nFrag = 0;
9251 prev = contentOffset - 1; /* Implied first min-heap entry */
9252 while( btreeHeapPull(heap,&x) ){
9253 if( (prev&0xffff)>=(x>>16) ){
9254 checkAppendMsg(pCheck,
9255 "Multiple uses for byte %u of page %d", x>>16, iPage);
9256 break;
9257 }else{
9258 nFrag += (x>>16) - (prev&0xffff) - 1;
9259 prev = x;
9260 }
9261 }
9262 nFrag += usableSize - (prev&0xffff) - 1;
9263 /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments
9264 ** is stored in the fifth field of the b-tree page header.
9265 ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the
9266 ** number of fragmented free bytes within the cell content area.
9267 */
9268 if( heap[0]==0 && nFrag!=data[hdr+7] ){
9269 checkAppendMsg(pCheck,
9270 "Fragmentation of %d bytes reported as %d on page %d",
9271 nFrag, data[hdr+7], iPage);
8206 } 9272 }
8207 } 9273 }
8208 9274
8209 if( !pPage->leaf ){ 9275 end_of_check:
8210 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); 9276 if( !doCoverageCheck ) pPage->isInit = savedIsInit;
8211 pCheck->zPfx = "On page %d at right child: ";
8212 pCheck->v1 = iPage;
8213 #ifndef SQLITE_OMIT_AUTOVACUUM
8214 if( pBt->autoVacuum ){
8215 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage);
8216 }
8217 #endif
8218 checkTreePage(pCheck, pgno, NULL, !pPage->nCell?NULL:&nMaxKey);
8219 }
8220
8221 /* For intKey leaf pages, check that the min/max keys are in order
8222 ** with any left/parent/right pages.
8223 */
8224 pCheck->zPfx = "Page %d: ";
8225 pCheck->v1 = iPage;
8226 if( pPage->leaf && pPage->intKey ){
8227 /* if we are a left child page */
8228 if( pnParentMinKey ){
8229 /* if we are the left most child page */
8230 if( !pnParentMaxKey ){
8231 if( nMaxKey > *pnParentMinKey ){
8232 checkAppendMsg(pCheck,
8233 "Rowid %lld out of order (max larger than parent min of %lld)",
8234 nMaxKey, *pnParentMinKey);
8235 }
8236 }else{
8237 if( nMinKey <= *pnParentMinKey ){
8238 checkAppendMsg(pCheck,
8239 "Rowid %lld out of order (min less than parent min of %lld)",
8240 nMinKey, *pnParentMinKey);
8241 }
8242 if( nMaxKey > *pnParentMaxKey ){
8243 checkAppendMsg(pCheck,
8244 "Rowid %lld out of order (max larger than parent max of %lld)",
8245 nMaxKey, *pnParentMaxKey);
8246 }
8247 *pnParentMinKey = nMaxKey;
8248 }
8249 /* else if we're a right child page */
8250 } else if( pnParentMaxKey ){
8251 if( nMinKey <= *pnParentMaxKey ){
8252 checkAppendMsg(pCheck,
8253 "Rowid %lld out of order (min less than parent max of %lld)",
8254 nMinKey, *pnParentMaxKey);
8255 }
8256 }
8257 }
8258
8259 /* Check for complete coverage of the page
8260 */
8261 data = pPage->aData;
8262 hdr = pPage->hdrOffset;
8263 hit = sqlite3PageMalloc( pBt->pageSize );
8264 pCheck->zPfx = 0;
8265 if( hit==0 ){
8266 pCheck->mallocFailed = 1;
8267 }else{
8268 int contentOffset = get2byteNotZero(&data[hdr+5]);
8269 assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */
8270 memset(hit+contentOffset, 0, usableSize-contentOffset);
8271 memset(hit, 1, contentOffset);
8272 nCell = get2byte(&data[hdr+3]);
8273 cellStart = hdr + 12 - 4*pPage->leaf;
8274 for(i=0; i<nCell; i++){
8275 int pc = get2byte(&data[cellStart+i*2]);
8276 u32 size = 65536;
8277 int j;
8278 if( pc<=usableSize-4 ){
8279 size = cellSizePtr(pPage, &data[pc]);
8280 }
8281 if( (int)(pc+size-1)>=usableSize ){
8282 pCheck->zPfx = 0;
8283 checkAppendMsg(pCheck,
8284 "Corruption detected in cell %d on page %d",i,iPage);
8285 }else{
8286 for(j=pc+size-1; j>=pc; j--) hit[j]++;
8287 }
8288 }
8289 i = get2byte(&data[hdr+1]);
8290 while( i>0 ){
8291 int size, j;
8292 assert( i<=usableSize-4 ); /* Enforced by btreeInitPage() */
8293 size = get2byte(&data[i+2]);
8294 assert( i+size<=usableSize ); /* Enforced by btreeInitPage() */
8295 for(j=i+size-1; j>=i; j--) hit[j]++;
8296 j = get2byte(&data[i]);
8297 assert( j==0 || j>i+size ); /* Enforced by btreeInitPage() */
8298 assert( j<=usableSize-4 ); /* Enforced by btreeInitPage() */
8299 i = j;
8300 }
8301 for(i=cnt=0; i<usableSize; i++){
8302 if( hit[i]==0 ){
8303 cnt++;
8304 }else if( hit[i]>1 ){
8305 checkAppendMsg(pCheck,
8306 "Multiple uses for byte %d of page %d", i, iPage);
8307 break;
8308 }
8309 }
8310 if( cnt!=data[hdr+7] ){
8311 checkAppendMsg(pCheck,
8312 "Fragmentation of %d bytes reported as %d on page %d",
8313 cnt, data[hdr+7], iPage);
8314 }
8315 }
8316 sqlite3PageFree(hit);
8317 releasePage(pPage); 9277 releasePage(pPage);
8318
8319 end_of_check:
8320 pCheck->zPfx = saved_zPfx; 9278 pCheck->zPfx = saved_zPfx;
8321 pCheck->v1 = saved_v1; 9279 pCheck->v1 = saved_v1;
8322 pCheck->v2 = saved_v2; 9280 pCheck->v2 = saved_v2;
8323 return depth+1; 9281 return depth+1;
8324 } 9282 }
8325 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 9283 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
8326 9284
8327 #ifndef SQLITE_OMIT_INTEGRITY_CHECK 9285 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
8328 /* 9286 /*
8329 ** This routine does a complete check of the given BTree file. aRoot[] is 9287 ** This routine does a complete check of the given BTree file. aRoot[] is
8330 ** an array of pages numbers were each page number is the root page of 9288 ** an array of pages numbers were each page number is the root page of
8331 ** a table. nRoot is the number of entries in aRoot. 9289 ** a table. nRoot is the number of entries in aRoot.
8332 ** 9290 **
8333 ** A read-only or read-write transaction must be opened before calling 9291 ** A read-only or read-write transaction must be opened before calling
8334 ** this function. 9292 ** this function.
8335 ** 9293 **
8336 ** Write the number of error seen in *pnErr. Except for some memory 9294 ** Write the number of error seen in *pnErr. Except for some memory
8337 ** allocation errors, an error message held in memory obtained from 9295 ** allocation errors, an error message held in memory obtained from
8338 ** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is 9296 ** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is
8339 ** returned. If a memory allocation error occurs, NULL is returned. 9297 ** returned. If a memory allocation error occurs, NULL is returned.
8340 */ 9298 */
8341 char *sqlite3BtreeIntegrityCheck( 9299 char *sqlite3BtreeIntegrityCheck(
8342 Btree *p, /* The btree to be checked */ 9300 Btree *p, /* The btree to be checked */
8343 int *aRoot, /* An array of root pages numbers for individual trees */ 9301 int *aRoot, /* An array of root pages numbers for individual trees */
8344 int nRoot, /* Number of entries in aRoot[] */ 9302 int nRoot, /* Number of entries in aRoot[] */
8345 int mxErr, /* Stop reporting errors after this many */ 9303 int mxErr, /* Stop reporting errors after this many */
8346 int *pnErr /* Write number of errors seen to this variable */ 9304 int *pnErr /* Write number of errors seen to this variable */
8347 ){ 9305 ){
8348 Pgno i; 9306 Pgno i;
8349 int nRef;
8350 IntegrityCk sCheck; 9307 IntegrityCk sCheck;
8351 BtShared *pBt = p->pBt; 9308 BtShared *pBt = p->pBt;
9309 int savedDbFlags = pBt->db->flags;
8352 char zErr[100]; 9310 char zErr[100];
9311 VVA_ONLY( int nRef );
8353 9312
8354 sqlite3BtreeEnter(p); 9313 sqlite3BtreeEnter(p);
8355 assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); 9314 assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE );
8356 nRef = sqlite3PagerRefcount(pBt->pPager); 9315 assert( (nRef = sqlite3PagerRefcount(pBt->pPager))>=0 );
8357 sCheck.pBt = pBt; 9316 sCheck.pBt = pBt;
8358 sCheck.pPager = pBt->pPager; 9317 sCheck.pPager = pBt->pPager;
8359 sCheck.nPage = btreePagecount(sCheck.pBt); 9318 sCheck.nPage = btreePagecount(sCheck.pBt);
8360 sCheck.mxErr = mxErr; 9319 sCheck.mxErr = mxErr;
8361 sCheck.nErr = 0; 9320 sCheck.nErr = 0;
8362 sCheck.mallocFailed = 0; 9321 sCheck.mallocFailed = 0;
8363 sCheck.zPfx = 0; 9322 sCheck.zPfx = 0;
8364 sCheck.v1 = 0; 9323 sCheck.v1 = 0;
8365 sCheck.v2 = 0; 9324 sCheck.v2 = 0;
8366 *pnErr = 0; 9325 sCheck.aPgRef = 0;
9326 sCheck.heap = 0;
9327 sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
8367 if( sCheck.nPage==0 ){ 9328 if( sCheck.nPage==0 ){
8368 sqlite3BtreeLeave(p); 9329 goto integrity_ck_cleanup;
8369 return 0;
8370 } 9330 }
8371 9331
8372 sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); 9332 sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1);
8373 if( !sCheck.aPgRef ){ 9333 if( !sCheck.aPgRef ){
8374 *pnErr = 1; 9334 sCheck.mallocFailed = 1;
8375 sqlite3BtreeLeave(p); 9335 goto integrity_ck_cleanup;
8376 return 0;
8377 } 9336 }
9337 sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize );
9338 if( sCheck.heap==0 ){
9339 sCheck.mallocFailed = 1;
9340 goto integrity_ck_cleanup;
9341 }
9342
8378 i = PENDING_BYTE_PAGE(pBt); 9343 i = PENDING_BYTE_PAGE(pBt);
8379 if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); 9344 if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i);
8380 sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), SQLITE_MAX_LENGTH);
8381 sCheck.errMsg.useMalloc = 2;
8382 9345
8383 /* Check the integrity of the freelist 9346 /* Check the integrity of the freelist
8384 */ 9347 */
8385 sCheck.zPfx = "Main freelist: "; 9348 sCheck.zPfx = "Main freelist: ";
8386 checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), 9349 checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
8387 get4byte(&pBt->pPage1->aData[36])); 9350 get4byte(&pBt->pPage1->aData[36]));
8388 sCheck.zPfx = 0; 9351 sCheck.zPfx = 0;
8389 9352
8390 /* Check all the tables. 9353 /* Check all the tables.
8391 */ 9354 */
9355 testcase( pBt->db->flags & SQLITE_CellSizeCk );
9356 pBt->db->flags &= ~SQLITE_CellSizeCk;
8392 for(i=0; (int)i<nRoot && sCheck.mxErr; i++){ 9357 for(i=0; (int)i<nRoot && sCheck.mxErr; i++){
9358 i64 notUsed;
8393 if( aRoot[i]==0 ) continue; 9359 if( aRoot[i]==0 ) continue;
8394 #ifndef SQLITE_OMIT_AUTOVACUUM 9360 #ifndef SQLITE_OMIT_AUTOVACUUM
8395 if( pBt->autoVacuum && aRoot[i]>1 ){ 9361 if( pBt->autoVacuum && aRoot[i]>1 ){
8396 checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); 9362 checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0);
8397 } 9363 }
8398 #endif 9364 #endif
8399 sCheck.zPfx = "List of tree roots: "; 9365 checkTreePage(&sCheck, aRoot[i], &notUsed, LARGEST_INT64);
8400 checkTreePage(&sCheck, aRoot[i], NULL, NULL);
8401 sCheck.zPfx = 0;
8402 } 9366 }
9367 pBt->db->flags = savedDbFlags;
8403 9368
8404 /* Make sure every page in the file is referenced 9369 /* Make sure every page in the file is referenced
8405 */ 9370 */
8406 for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ 9371 for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
8407 #ifdef SQLITE_OMIT_AUTOVACUUM 9372 #ifdef SQLITE_OMIT_AUTOVACUUM
8408 if( getPageReferenced(&sCheck, i)==0 ){ 9373 if( getPageReferenced(&sCheck, i)==0 ){
8409 checkAppendMsg(&sCheck, "Page %d is never used", i); 9374 checkAppendMsg(&sCheck, "Page %d is never used", i);
8410 } 9375 }
8411 #else 9376 #else
8412 /* If the database supports auto-vacuum, make sure no tables contain 9377 /* If the database supports auto-vacuum, make sure no tables contain
8413 ** references to pointer-map pages. 9378 ** references to pointer-map pages.
8414 */ 9379 */
8415 if( getPageReferenced(&sCheck, i)==0 && 9380 if( getPageReferenced(&sCheck, i)==0 &&
8416 (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ 9381 (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
8417 checkAppendMsg(&sCheck, "Page %d is never used", i); 9382 checkAppendMsg(&sCheck, "Page %d is never used", i);
8418 } 9383 }
8419 if( getPageReferenced(&sCheck, i)!=0 && 9384 if( getPageReferenced(&sCheck, i)!=0 &&
8420 (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ 9385 (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
8421 checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); 9386 checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i);
8422 } 9387 }
8423 #endif 9388 #endif
8424 } 9389 }
8425 9390
8426 /* Make sure this analysis did not leave any unref() pages.
8427 ** This is an internal consistency check; an integrity check
8428 ** of the integrity check.
8429 */
8430 if( NEVER(nRef != sqlite3PagerRefcount(pBt->pPager)) ){
8431 checkAppendMsg(&sCheck,
8432 "Outstanding page count goes from %d to %d during this analysis",
8433 nRef, sqlite3PagerRefcount(pBt->pPager)
8434 );
8435 }
8436
8437 /* Clean up and report errors. 9391 /* Clean up and report errors.
8438 */ 9392 */
8439 sqlite3BtreeLeave(p); 9393 integrity_ck_cleanup:
9394 sqlite3PageFree(sCheck.heap);
8440 sqlite3_free(sCheck.aPgRef); 9395 sqlite3_free(sCheck.aPgRef);
8441 if( sCheck.mallocFailed ){ 9396 if( sCheck.mallocFailed ){
8442 sqlite3StrAccumReset(&sCheck.errMsg); 9397 sqlite3StrAccumReset(&sCheck.errMsg);
8443 *pnErr = sCheck.nErr+1; 9398 sCheck.nErr++;
8444 return 0;
8445 } 9399 }
8446 *pnErr = sCheck.nErr; 9400 *pnErr = sCheck.nErr;
8447 if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg); 9401 if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg);
9402 /* Make sure this analysis did not leave any unref() pages. */
9403 assert( nRef==sqlite3PagerRefcount(pBt->pPager) );
9404 sqlite3BtreeLeave(p);
8448 return sqlite3StrAccumFinish(&sCheck.errMsg); 9405 return sqlite3StrAccumFinish(&sCheck.errMsg);
8449 } 9406 }
8450 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ 9407 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
8451 9408
8452 /* 9409 /*
8453 ** Return the full pathname of the underlying database file. Return 9410 ** Return the full pathname of the underlying database file. Return
8454 ** an empty string if the database is in-memory or a TEMP database. 9411 ** an empty string if the database is in-memory or a TEMP database.
8455 ** 9412 **
8456 ** The pager filename is invariant as long as the pager is 9413 ** The pager filename is invariant as long as the pager is
8457 ** open so it is safe to access without the BtShared mutex. 9414 ** open so it is safe to access without the BtShared mutex.
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
8648 assert( pCsr->apPage[pCsr->iPage]->intKey ); 9605 assert( pCsr->apPage[pCsr->iPage]->intKey );
8649 9606
8650 return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); 9607 return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1);
8651 } 9608 }
8652 9609
8653 /* 9610 /*
8654 ** Mark this cursor as an incremental blob cursor. 9611 ** Mark this cursor as an incremental blob cursor.
8655 */ 9612 */
8656 void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ 9613 void sqlite3BtreeIncrblobCursor(BtCursor *pCur){
8657 pCur->curFlags |= BTCF_Incrblob; 9614 pCur->curFlags |= BTCF_Incrblob;
9615 pCur->pBtree->hasIncrblobCur = 1;
8658 } 9616 }
8659 #endif 9617 #endif
8660 9618
8661 /* 9619 /*
8662 ** Set both the "read version" (single byte at byte offset 18) and 9620 ** Set both the "read version" (single byte at byte offset 18) and
8663 ** "write version" (single byte at byte offset 19) fields in the database 9621 ** "write version" (single byte at byte offset 19) fields in the database
8664 ** header to iVersion. 9622 ** header to iVersion.
8665 */ 9623 */
8666 int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ 9624 int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){
8667 BtShared *pBt = pBtree->pBt; 9625 BtShared *pBt = pBtree->pBt;
(...skipping 20 matching lines...) Expand all
8688 } 9646 }
8689 } 9647 }
8690 } 9648 }
8691 } 9649 }
8692 9650
8693 pBt->btsFlags &= ~BTS_NO_WAL; 9651 pBt->btsFlags &= ~BTS_NO_WAL;
8694 return rc; 9652 return rc;
8695 } 9653 }
8696 9654
8697 /* 9655 /*
8698 ** set the mask of hint flags for cursor pCsr. Currently the only valid 9656 ** Return true if the cursor has a hint specified. This routine is
8699 ** values are 0 and BTREE_BULKLOAD. 9657 ** only used from within assert() statements
8700 */ 9658 */
8701 void sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){ 9659 int sqlite3BtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){
8702 assert( mask==BTREE_BULKLOAD || mask==0 ); 9660 return (pCsr->hints & mask)!=0;
8703 pCsr->hints = mask;
8704 } 9661 }
8705 9662
8706 /* 9663 /*
8707 ** Return true if the given Btree is read-only. 9664 ** Return true if the given Btree is read-only.
8708 */ 9665 */
8709 int sqlite3BtreeIsReadonly(Btree *p){ 9666 int sqlite3BtreeIsReadonly(Btree *p){
8710 return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; 9667 return (p->pBt->btsFlags & BTS_READ_ONLY)!=0;
8711 } 9668 }
9669
9670 /*
9671 ** Return the size of the header added to each page by this module.
9672 */
9673 int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); }
OLDNEW
« no previous file with comments | « third_party/sqlite/sqlite-src-3100200/src/btree.h ('k') | third_party/sqlite/sqlite-src-3100200/src/btreeInt.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698