| Index: third_party/sqlite/src/src/btree.c | 
| diff --git a/third_party/sqlite/src/src/btree.c b/third_party/sqlite/src/src/btree.c | 
| index 103a1f3230f1c9b9ad74468ae35911a1530287aa..7ea66e0d3be94e88c344f06b969bffafc69ecd0c 100644 | 
| --- a/third_party/sqlite/src/src/btree.c | 
| +++ b/third_party/sqlite/src/src/btree.c | 
| @@ -9,7 +9,7 @@ | 
| **    May you share freely, never taking more than you give. | 
| ** | 
| ************************************************************************* | 
| -** This file implements a external (disk-based) database using BTrees. | 
| +** This file implements an external (disk-based) database using BTrees. | 
| ** See the header comment on "btreeInt.h" for additional information. | 
| ** Including a description of file format and an overview of operation. | 
| */ | 
| @@ -43,6 +43,25 @@ int sqlite3BtreeTrace=1;  /* True to enable tracing */ | 
| */ | 
| #define get2byteNotZero(X)  (((((int)get2byte(X))-1)&0xffff)+1) | 
|  | 
| +/* | 
| +** Values passed as the 5th argument to allocateBtreePage() | 
| +*/ | 
| +#define BTALLOC_ANY   0           /* Allocate any page */ | 
| +#define BTALLOC_EXACT 1           /* Allocate exact page if possible */ | 
| +#define BTALLOC_LE    2           /* Allocate any page <= the parameter */ | 
| + | 
| +/* | 
| +** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not | 
| +** defined, or 0 if it is. For example: | 
| +** | 
| +**   bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum); | 
| +*/ | 
| +#ifndef SQLITE_OMIT_AUTOVACUUM | 
| +#define IfNotOmitAV(expr) (expr) | 
| +#else | 
| +#define IfNotOmitAV(expr) 0 | 
| +#endif | 
| + | 
| #ifndef SQLITE_OMIT_SHARED_CACHE | 
| /* | 
| ** A list of BtShared objects that are eligible for participation | 
| @@ -143,7 +162,7 @@ static int hasSharedCacheTableLock( | 
| ** the correct locks are held.  So do not bother - just return true. | 
| ** This case does not come up very often anyhow. | 
| */ | 
| -  if( isIndex && (!pSchema || (pSchema->flags&DB_SchemaLoaded)==0) ){ | 
| +  if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){ | 
| return 1; | 
| } | 
|  | 
| @@ -243,7 +262,7 @@ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){ | 
| /* If some other connection is holding an exclusive lock, the | 
| ** requested lock may not be obtained. | 
| */ | 
| -  if( pBt->pWriter!=p && pBt->isExclusive ){ | 
| +  if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){ | 
| sqlite3ConnectionBlocked(p->db, pBt->pWriter->db); | 
| return SQLITE_LOCKED_SHAREDCACHE; | 
| } | 
| @@ -264,7 +283,7 @@ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){ | 
| sqlite3ConnectionBlocked(p->db, pIter->pBtree->db); | 
| if( eLock==WRITE_LOCK ){ | 
| assert( p==pBt->pWriter ); | 
| -        pBt->isPending = 1; | 
| +        pBt->btsFlags |= BTS_PENDING; | 
| } | 
| return SQLITE_LOCKED_SHAREDCACHE; | 
| } | 
| @@ -352,7 +371,7 @@ static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){ | 
| ** the setSharedCacheTableLock() procedure) held by Btree object p. | 
| ** | 
| ** This function assumes that Btree p has an open read or write | 
| -** transaction. If it does not, then the BtShared.isPending variable | 
| +** transaction. If it does not, then the BTS_PENDING flag | 
| ** may be incorrectly cleared. | 
| */ | 
| static void clearAllSharedCacheTableLocks(Btree *p){ | 
| @@ -365,7 +384,7 @@ static void clearAllSharedCacheTableLocks(Btree *p){ | 
|  | 
| while( *ppIter ){ | 
| BtLock *pLock = *ppIter; | 
| -    assert( pBt->isExclusive==0 || pBt->pWriter==pLock->pBtree ); | 
| +    assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree ); | 
| assert( pLock->pBtree->inTrans>=pLock->eLock ); | 
| if( pLock->pBtree==p ){ | 
| *ppIter = pLock->pNext; | 
| @@ -378,22 +397,21 @@ static void clearAllSharedCacheTableLocks(Btree *p){ | 
| } | 
| } | 
|  | 
| -  assert( pBt->isPending==0 || pBt->pWriter ); | 
| +  assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter ); | 
| if( pBt->pWriter==p ){ | 
| pBt->pWriter = 0; | 
| -    pBt->isExclusive = 0; | 
| -    pBt->isPending = 0; | 
| +    pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); | 
| }else if( pBt->nTransaction==2 ){ | 
| /* This function is called when Btree p is concluding its | 
| ** transaction. If there currently exists a writer, and p is not | 
| ** that writer, then the number of locks held by connections other | 
| ** than the writer must be about to drop to zero. In this case | 
| -    ** set the isPending flag to 0. | 
| +    ** set the BTS_PENDING flag to 0. | 
| ** | 
| -    ** If there is not currently a writer, then BtShared.isPending must | 
| +    ** If there is not currently a writer, then BTS_PENDING must | 
| ** be zero already. So this next line is harmless in that case. | 
| */ | 
| -    pBt->isPending = 0; | 
| +    pBt->btsFlags &= ~BTS_PENDING; | 
| } | 
| } | 
|  | 
| @@ -405,8 +423,7 @@ static void downgradeAllSharedCacheTableLocks(Btree *p){ | 
| if( pBt->pWriter==p ){ | 
| BtLock *pLock; | 
| pBt->pWriter = 0; | 
| -    pBt->isExclusive = 0; | 
| -    pBt->isPending = 0; | 
| +    pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); | 
| for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){ | 
| assert( pLock->eLock==READ_LOCK || pLock->pBtree==p ); | 
| pLock->eLock = READ_LOCK; | 
| @@ -429,16 +446,11 @@ static int cursorHoldsMutex(BtCursor *p){ | 
| } | 
| #endif | 
|  | 
| - | 
| -#ifndef SQLITE_OMIT_INCRBLOB | 
| /* | 
| -** Invalidate the overflow page-list cache for cursor pCur, if any. | 
| +** Invalidate the overflow cache of the cursor passed as the first argument. | 
| +** on the shared btree structure pBt. | 
| */ | 
| -static void invalidateOverflowCache(BtCursor *pCur){ | 
| -  assert( cursorHoldsMutex(pCur) ); | 
| -  sqlite3_free(pCur->aOverflow); | 
| -  pCur->aOverflow = 0; | 
| -} | 
| +#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) | 
|  | 
| /* | 
| ** Invalidate the overflow page-list cache for all cursors opened | 
| @@ -452,6 +464,7 @@ static void invalidateAllOverflowCache(BtShared *pBt){ | 
| } | 
| } | 
|  | 
| +#ifndef SQLITE_OMIT_INCRBLOB | 
| /* | 
| ** This function is called before modifying the contents of a table | 
| ** to invalidate any incrblob cursors that are open on the | 
| @@ -474,16 +487,16 @@ static void invalidateIncrblobCursors( | 
| BtShared *pBt = pBtree->pBt; | 
| assert( sqlite3BtreeHoldsMutex(pBtree) ); | 
| for(p=pBt->pCursor; p; p=p->pNext){ | 
| -    if( p->isIncrblobHandle && (isClearTable || p->info.nKey==iRow) ){ | 
| +    if( (p->curFlags & BTCF_Incrblob)!=0 | 
| +     && (isClearTable || p->info.nKey==iRow) | 
| +    ){ | 
| p->eState = CURSOR_INVALID; | 
| } | 
| } | 
| } | 
|  | 
| #else | 
| -  /* Stub functions when INCRBLOB is omitted */ | 
| -  #define invalidateOverflowCache(x) | 
| -  #define invalidateAllOverflowCache(x) | 
| +  /* Stub function when INCRBLOB is omitted */ | 
| #define invalidateIncrblobCursors(x,y,z) | 
| #endif /* SQLITE_OMIT_INCRBLOB */ | 
|  | 
| @@ -559,6 +572,19 @@ static void btreeClearHasContent(BtShared *pBt){ | 
| } | 
|  | 
| /* | 
| +** Release all of the apPage[] pages for a cursor. | 
| +*/ | 
| +static void btreeReleaseAllCursorPages(BtCursor *pCur){ | 
| +  int i; | 
| +  for(i=0; i<=pCur->iPage; i++){ | 
| +    releasePage(pCur->apPage[i]); | 
| +    pCur->apPage[i] = 0; | 
| +  } | 
| +  pCur->iPage = -1; | 
| +} | 
| + | 
| + | 
| +/* | 
| ** Save the current cursor position in the variables BtCursor.nKey | 
| ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. | 
| ** | 
| @@ -582,7 +608,7 @@ static int saveCursorPosition(BtCursor *pCur){ | 
| ** data. | 
| */ | 
| if( 0==pCur->apPage[0]->intKey ){ | 
| -    void *pKey = sqlite3Malloc( (int)pCur->nKey ); | 
| +    void *pKey = sqlite3Malloc( pCur->nKey ); | 
| if( pKey ){ | 
| rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); | 
| if( rc==SQLITE_OK ){ | 
| @@ -597,12 +623,7 @@ static int saveCursorPosition(BtCursor *pCur){ | 
| assert( !pCur->apPage[0]->intKey || !pCur->pKey ); | 
|  | 
| if( rc==SQLITE_OK ){ | 
| -    int i; | 
| -    for(i=0; i<=pCur->iPage; i++){ | 
| -      releasePage(pCur->apPage[i]); | 
| -      pCur->apPage[i] = 0; | 
| -    } | 
| -    pCur->iPage = -1; | 
| +    btreeReleaseAllCursorPages(pCur); | 
| pCur->eState = CURSOR_REQUIRESEEK; | 
| } | 
|  | 
| @@ -610,24 +631,55 @@ static int saveCursorPosition(BtCursor *pCur){ | 
| return rc; | 
| } | 
|  | 
| +/* Forward reference */ | 
| +static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); | 
| + | 
| /* | 
| ** Save the positions of all cursors (except pExcept) that are open on | 
| -** the table  with root-page iRoot. Usually, this is called just before cursor | 
| -** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()). | 
| +** the table with root-page iRoot.  "Saving the cursor position" means that | 
| +** the location in the btree is remembered in such a way that it can be | 
| +** moved back to the same spot after the btree has been modified.  This | 
| +** routine is called just before cursor pExcept is used to modify the | 
| +** table, for example in BtreeDelete() or BtreeInsert(). | 
| +** | 
| +** Implementation note:  This routine merely checks to see if any cursors | 
| +** need to be saved.  It calls out to saveCursorsOnList() in the (unusual) | 
| +** event that cursors are in need to being saved. | 
| */ | 
| static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ | 
| BtCursor *p; | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| assert( pExcept==0 || pExcept->pBt==pBt ); | 
| for(p=pBt->pCursor; p; p=p->pNext){ | 
| -    if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) && | 
| -        p->eState==CURSOR_VALID ){ | 
| -      int rc = saveCursorPosition(p); | 
| -      if( SQLITE_OK!=rc ){ | 
| -        return rc; | 
| +    if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; | 
| +  } | 
| +  return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK; | 
| +} | 
| + | 
| +/* This helper routine to saveAllCursors does the actual work of saving | 
| +** the cursors if and when a cursor is found that actually requires saving. | 
| +** The common case is that no cursors need to be saved, so this routine is | 
| +** broken out from its caller to avoid unnecessary stack pointer movement. | 
| +*/ | 
| +static int SQLITE_NOINLINE saveCursorsOnList( | 
| +  BtCursor *p,         /* The first cursor that needs saving */ | 
| +  Pgno iRoot,          /* Only save cursor with this iRoot. Save all if zero */ | 
| +  BtCursor *pExcept    /* Do not save this cursor */ | 
| +){ | 
| +  do{ | 
| +    if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ | 
| +      if( p->eState==CURSOR_VALID ){ | 
| +        int rc = saveCursorPosition(p); | 
| +        if( SQLITE_OK!=rc ){ | 
| +          return rc; | 
| +        } | 
| +      }else{ | 
| +        testcase( p->iPage>0 ); | 
| +        btreeReleaseAllCursorPages(p); | 
| } | 
| } | 
| -  } | 
| +    p = p->pNext; | 
| +  }while( p ); | 
| return SQLITE_OK; | 
| } | 
|  | 
| @@ -655,19 +707,26 @@ static int btreeMoveto( | 
| ){ | 
| int rc;                    /* Status code */ | 
| UnpackedRecord *pIdxKey;   /* Unpacked index key */ | 
| -  char aSpace[150];          /* Temp space for pIdxKey - to avoid a malloc */ | 
| +  char aSpace[200];          /* Temp space for pIdxKey - to avoid a malloc */ | 
| +  char *pFree = 0; | 
|  | 
| if( pKey ){ | 
| assert( nKey==(i64)(int)nKey ); | 
| -    pIdxKey = sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, | 
| -                                      aSpace, sizeof(aSpace)); | 
| +    pIdxKey = sqlite3VdbeAllocUnpackedRecord( | 
| +        pCur->pKeyInfo, aSpace, sizeof(aSpace), &pFree | 
| +    ); | 
| if( pIdxKey==0 ) return SQLITE_NOMEM; | 
| +    sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey); | 
| +    if( pIdxKey->nField==0 ){ | 
| +      sqlite3DbFree(pCur->pKeyInfo->db, pFree); | 
| +      return SQLITE_CORRUPT_BKPT; | 
| +    } | 
| }else{ | 
| pIdxKey = 0; | 
| } | 
| rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes); | 
| -  if( pKey ){ | 
| -    sqlite3VdbeDeleteUnpackedRecord(pIdxKey); | 
| +  if( pFree ){ | 
| +    sqlite3DbFree(pCur->pKeyInfo->db, pFree); | 
| } | 
| return rc; | 
| } | 
| @@ -692,6 +751,9 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ | 
| sqlite3_free(pCur->pKey); | 
| pCur->pKey = 0; | 
| assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); | 
| +    if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ | 
| +      pCur->eState = CURSOR_SKIPNEXT; | 
| +    } | 
| } | 
| return rc; | 
| } | 
| @@ -702,25 +764,48 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ | 
| SQLITE_OK) | 
|  | 
| /* | 
| -** Determine whether or not a cursor has moved from the position it | 
| -** was last placed at.  Cursors can move when the row they are pointing | 
| -** at is deleted out from under them. | 
| +** Determine whether or not a cursor has moved from the position where | 
| +** it was last placed, or has been invalidated for any other reason. | 
| +** Cursors can move when the row they are pointing at is deleted out | 
| +** from under them, for example.  Cursor might also move if a btree | 
| +** is rebalanced. | 
| +** | 
| +** Calling this routine with a NULL cursor pointer returns false. | 
| +** | 
| +** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor | 
| +** back to where it ought to be if this routine returns true. | 
| +*/ | 
| +int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ | 
| +  return pCur->eState!=CURSOR_VALID; | 
| +} | 
| + | 
| +/* | 
| +** This routine restores a cursor back to its original position after it | 
| +** has been moved by some outside activity (such as a btree rebalance or | 
| +** a row having been deleted out from under the cursor). | 
| +** | 
| +** On success, the *pDifferentRow parameter is false if the cursor is left | 
| +** pointing at exactly the same row.  *pDifferntRow is the row the cursor | 
| +** was pointing to has been deleted, forcing the cursor to point to some | 
| +** nearby row. | 
| ** | 
| -** This routine returns an error code if something goes wrong.  The | 
| -** integer *pHasMoved is set to one if the cursor has moved and 0 if not. | 
| +** This routine should only be called for a cursor that just returned | 
| +** TRUE from sqlite3BtreeCursorHasMoved(). | 
| */ | 
| -int sqlite3BtreeCursorHasMoved(BtCursor *pCur, int *pHasMoved){ | 
| +int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ | 
| int rc; | 
|  | 
| +  assert( pCur!=0 ); | 
| +  assert( pCur->eState!=CURSOR_VALID ); | 
| rc = restoreCursorPosition(pCur); | 
| if( rc ){ | 
| -    *pHasMoved = 1; | 
| +    *pDifferentRow = 1; | 
| return rc; | 
| } | 
| -  if( pCur->eState!=CURSOR_VALID || pCur->skipNext!=0 ){ | 
| -    *pHasMoved = 1; | 
| +  if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){ | 
| +    *pDifferentRow = 1; | 
| }else{ | 
| -    *pHasMoved = 0; | 
| +    *pDifferentRow = 0; | 
| } | 
| return SQLITE_OK; | 
| } | 
| @@ -788,6 +873,7 @@ static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){ | 
| *pRC = SQLITE_CORRUPT_BKPT; | 
| goto ptrmap_exit; | 
| } | 
| +  assert( offset <= (int)pBt->usableSize-5 ); | 
| pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); | 
|  | 
| if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ | 
| @@ -827,6 +913,11 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ | 
| pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); | 
|  | 
| offset = PTRMAP_PTROFFSET(iPtrmap, key); | 
| +  if( offset<0 ){ | 
| +    sqlite3PagerUnref(pDbPage); | 
| +    return SQLITE_CORRUPT_BKPT; | 
| +  } | 
| +  assert( offset <= (int)pBt->usableSize-5 ); | 
| assert( pEType!=0 ); | 
| *pEType = pPtrmap[offset]; | 
| if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); | 
| @@ -850,7 +941,9 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ | 
| ** This routine works only for pages that do not contain overflow cells. | 
| */ | 
| #define findCell(P,I) \ | 
| -  ((P)->aData + ((P)->maskPage & get2byte(&(P)->aData[(P)->cellOffset+2*(I)]))) | 
| +  ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) | 
| +#define findCellv2(D,M,O,I) (D+(M&get2byte(D+(O+2*(I))))) | 
| + | 
|  | 
| /* | 
| ** This a more complex version of findCell() that works for | 
| @@ -861,12 +954,10 @@ static u8 *findOverflowCell(MemPage *pPage, int iCell){ | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| for(i=pPage->nOverflow-1; i>=0; i--){ | 
| int k; | 
| -    struct _OvflCell *pOvfl; | 
| -    pOvfl = &pPage->aOvfl[i]; | 
| -    k = pOvfl->idx; | 
| +    k = pPage->aiOvfl[i]; | 
| if( k<=iCell ){ | 
| if( k==iCell ){ | 
| -        return pOvfl->pCell; | 
| +        return pPage->apOvfl[i]; | 
| } | 
| iCell--; | 
| } | 
| @@ -879,46 +970,44 @@ static u8 *findOverflowCell(MemPage *pPage, int iCell){ | 
| ** are two versions of this function.  btreeParseCell() takes a | 
| ** cell index as the second argument and btreeParseCellPtr() | 
| ** takes a pointer to the body of the cell as its second argument. | 
| -** | 
| -** Within this file, the parseCell() macro can be called instead of | 
| -** btreeParseCellPtr(). Using some compilers, this will be faster. | 
| */ | 
| static void btreeParseCellPtr( | 
| MemPage *pPage,         /* Page containing the cell */ | 
| u8 *pCell,              /* Pointer to the cell text. */ | 
| CellInfo *pInfo         /* Fill in this structure */ | 
| ){ | 
| -  u16 n;                  /* Number bytes in cell content header */ | 
| +  u8 *pIter;              /* For scanning through pCell */ | 
| u32 nPayload;           /* Number of bytes of cell payload */ | 
|  | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| - | 
| -  pInfo->pCell = pCell; | 
| assert( pPage->leaf==0 || pPage->leaf==1 ); | 
| -  n = pPage->childPtrSize; | 
| -  assert( n==4-4*pPage->leaf ); | 
| -  if( pPage->intKey ){ | 
| -    if( pPage->hasData ){ | 
| -      n += getVarint32(&pCell[n], nPayload); | 
| -    }else{ | 
| -      nPayload = 0; | 
| -    } | 
| -    n += getVarint(&pCell[n], (u64*)&pInfo->nKey); | 
| -    pInfo->nData = nPayload; | 
| +  if( pPage->intKeyLeaf ){ | 
| +    assert( pPage->childPtrSize==0 ); | 
| +    pIter = pCell + getVarint32(pCell, nPayload); | 
| +    pIter += getVarint(pIter, (u64*)&pInfo->nKey); | 
| +  }else if( pPage->noPayload ){ | 
| +    assert( pPage->childPtrSize==4 ); | 
| +    pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); | 
| +    pInfo->nPayload = 0; | 
| +    pInfo->nLocal = 0; | 
| +    pInfo->iOverflow = 0; | 
| +    pInfo->pPayload = 0; | 
| +    return; | 
| }else{ | 
| -    pInfo->nData = 0; | 
| -    n += getVarint32(&pCell[n], nPayload); | 
| +    pIter = pCell + pPage->childPtrSize; | 
| +    pIter += getVarint32(pIter, nPayload); | 
| pInfo->nKey = nPayload; | 
| } | 
| pInfo->nPayload = nPayload; | 
| -  pInfo->nHeader = n; | 
| +  pInfo->pPayload = pIter; | 
| testcase( nPayload==pPage->maxLocal ); | 
| testcase( nPayload==pPage->maxLocal+1 ); | 
| -  if( likely(nPayload<=pPage->maxLocal) ){ | 
| +  if( nPayload<=pPage->maxLocal ){ | 
| /* This is the (easy) common case where the entire payload fits | 
| ** on the local page.  No overflow is required. | 
| */ | 
| -    if( (pInfo->nSize = (u16)(n+nPayload))<4 ) pInfo->nSize = 4; | 
| +    pInfo->nSize = nPayload + (u16)(pIter - pCell); | 
| +    if( pInfo->nSize<4 ) pInfo->nSize = 4; | 
| pInfo->nLocal = (u16)nPayload; | 
| pInfo->iOverflow = 0; | 
| }else{ | 
| @@ -945,18 +1034,16 @@ static void btreeParseCellPtr( | 
| }else{ | 
| pInfo->nLocal = (u16)minLocal; | 
| } | 
| -    pInfo->iOverflow = (u16)(pInfo->nLocal + n); | 
| +    pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); | 
| pInfo->nSize = pInfo->iOverflow + 4; | 
| } | 
| } | 
| -#define parseCell(pPage, iCell, pInfo) \ | 
| -  btreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo)) | 
| static void btreeParseCell( | 
| MemPage *pPage,         /* Page containing the cell */ | 
| int iCell,              /* The cell index.  First cell is 0 */ | 
| CellInfo *pInfo         /* Fill in this structure */ | 
| ){ | 
| -  parseCell(pPage, iCell, pInfo); | 
| +  btreeParseCellPtr(pPage, findCell(pPage, iCell), pInfo); | 
| } | 
|  | 
| /* | 
| @@ -966,8 +1053,9 @@ static void btreeParseCell( | 
| ** the space used by the cell pointer. | 
| */ | 
| static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ | 
| -  u8 *pIter = &pCell[pPage->childPtrSize]; | 
| -  u32 nSize; | 
| +  u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ | 
| +  u8 *pEnd;                                /* End mark for a varint */ | 
| +  u32 nSize;                               /* Size value to return */ | 
|  | 
| #ifdef SQLITE_DEBUG | 
| /* The value returned by this function should always be the same as | 
| @@ -978,26 +1066,34 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ | 
| btreeParseCellPtr(pPage, pCell, &debuginfo); | 
| #endif | 
|  | 
| +  if( pPage->noPayload ){ | 
| +    pEnd = &pIter[9]; | 
| +    while( (*pIter++)&0x80 && pIter<pEnd ); | 
| +    assert( pPage->childPtrSize==4 ); | 
| +    return (u16)(pIter - pCell); | 
| +  } | 
| +  nSize = *pIter; | 
| +  if( nSize>=0x80 ){ | 
| +    pEnd = &pIter[9]; | 
| +    nSize &= 0x7f; | 
| +    do{ | 
| +      nSize = (nSize<<7) | (*++pIter & 0x7f); | 
| +    }while( *(pIter)>=0x80 && pIter<pEnd ); | 
| +  } | 
| +  pIter++; | 
| if( pPage->intKey ){ | 
| -    u8 *pEnd; | 
| -    if( pPage->hasData ){ | 
| -      pIter += getVarint32(pIter, nSize); | 
| -    }else{ | 
| -      nSize = 0; | 
| -    } | 
| - | 
| /* pIter now points at the 64-bit integer key value, a variable length | 
| ** integer. The following block moves pIter to point at the first byte | 
| ** past the end of the key value. */ | 
| pEnd = &pIter[9]; | 
| while( (*pIter++)&0x80 && pIter<pEnd ); | 
| -  }else{ | 
| -    pIter += getVarint32(pIter, nSize); | 
| } | 
| - | 
| testcase( nSize==pPage->maxLocal ); | 
| testcase( nSize==pPage->maxLocal+1 ); | 
| -  if( nSize>pPage->maxLocal ){ | 
| +  if( nSize<=pPage->maxLocal ){ | 
| +    nSize += (u32)(pIter - pCell); | 
| +    if( nSize<4 ) nSize = 4; | 
| +  }else{ | 
| int minLocal = pPage->minLocal; | 
| nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4); | 
| testcase( nSize==pPage->maxLocal ); | 
| @@ -1005,16 +1101,9 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ | 
| if( nSize>pPage->maxLocal ){ | 
| nSize = minLocal; | 
| } | 
| -    nSize += 4; | 
| +    nSize += 4 + (u16)(pIter - pCell); | 
| } | 
| -  nSize += (u32)(pIter - pCell); | 
| - | 
| -  /* The minimum size of any cell is 4 bytes. */ | 
| -  if( nSize<4 ){ | 
| -    nSize = 4; | 
| -  } | 
| - | 
| -  assert( nSize==debuginfo.nSize ); | 
| +  assert( nSize==debuginfo.nSize || CORRUPT_DB ); | 
| return (u16)nSize; | 
| } | 
|  | 
| @@ -1037,7 +1126,6 @@ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ | 
| if( *pRC ) return; | 
| assert( pCell!=0 ); | 
| btreeParseCellPtr(pPage, pCell, &info); | 
| -  assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload ); | 
| if( info.iOverflow ){ | 
| Pgno ovfl = get4byte(&pCell[info.iOverflow]); | 
| ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); | 
| @@ -1054,7 +1142,7 @@ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ | 
| */ | 
| static int defragmentPage(MemPage *pPage){ | 
| int i;                     /* Loop counter */ | 
| -  int pc;                    /* Address of a i-th cell */ | 
| +  int pc;                    /* Address of the i-th cell */ | 
| int hdr;                   /* Offset to the page header */ | 
| int size;                  /* Size of a cell */ | 
| int usableSize;            /* Number of usable bytes on a page */ | 
| @@ -1145,7 +1233,6 @@ static int defragmentPage(MemPage *pPage){ | 
| static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ | 
| const int hdr = pPage->hdrOffset;    /* Local cache of pPage->hdrOffset */ | 
| u8 * const data = pPage->aData;      /* Local cache of pPage->aData */ | 
| -  int nFrag;                           /* Number of fragmented bytes on pPage */ | 
| int top;                             /* First byte of cell content area */ | 
| int gap;        /* First byte of gap between cell pointers and cell content */ | 
| int rc;         /* Integer return code */ | 
| @@ -1160,25 +1247,26 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ | 
| usableSize = pPage->pBt->usableSize; | 
| assert( nByte < usableSize-8 ); | 
|  | 
| -  nFrag = data[hdr+7]; | 
| assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); | 
| gap = pPage->cellOffset + 2*pPage->nCell; | 
| -  top = get2byteNotZero(&data[hdr+5]); | 
| -  if( gap>top ) return SQLITE_CORRUPT_BKPT; | 
| +  assert( gap<=65536 ); | 
| +  top = get2byte(&data[hdr+5]); | 
| +  if( gap>top ){ | 
| +    if( top==0 ){ | 
| +      top = 65536; | 
| +    }else{ | 
| +      return SQLITE_CORRUPT_BKPT; | 
| +    } | 
| +  } | 
| + | 
| +  /* If there is enough space between gap and top for one more cell pointer | 
| +  ** array entry offset, and if the freelist is not empty, then search the | 
| +  ** freelist looking for a free slot big enough to satisfy the request. | 
| +  */ | 
| testcase( gap+2==top ); | 
| testcase( gap+1==top ); | 
| testcase( gap==top ); | 
| - | 
| -  if( nFrag>=60 ){ | 
| -    /* Always defragment highly fragmented pages */ | 
| -    rc = defragmentPage(pPage); | 
| -    if( rc ) return rc; | 
| -    top = get2byteNotZero(&data[hdr+5]); | 
| -  }else if( gap+2<=top ){ | 
| -    /* Search the freelist looking for a free slot big enough to satisfy | 
| -    ** the request. The allocation is made from the first free slot in | 
| -    ** the list that is large enough to accomadate it. | 
| -    */ | 
| +  if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){ | 
| int pc, addr; | 
| for(addr=hdr+1; (pc = get2byte(&data[addr]))>0; addr=pc){ | 
| int size;            /* Size of the free slot */ | 
| @@ -1191,10 +1279,11 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ | 
| testcase( x==4 ); | 
| testcase( x==3 ); | 
| if( x<4 ){ | 
| +          if( data[hdr+7]>=60 ) goto defragment_page; | 
| /* Remove the slot from the free-list. Update the number of | 
| ** fragmented bytes within the page. */ | 
| memcpy(&data[addr], &data[pc], 2); | 
| -          data[hdr+7] = (u8)(nFrag + x); | 
| +          data[hdr+7] += (u8)x; | 
| }else if( size+pc > usableSize ){ | 
| return SQLITE_CORRUPT_BKPT; | 
| }else{ | 
| @@ -1208,11 +1297,13 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ | 
| } | 
| } | 
|  | 
| -  /* Check to make sure there is enough space in the gap to satisfy | 
| -  ** the allocation.  If not, defragment. | 
| +  /* The request could not be fulfilled using a freelist slot.  Check | 
| +  ** to see if defragmentation is necessary. | 
| */ | 
| testcase( gap+2+nByte==top ); | 
| if( gap+2+nByte>top ){ | 
| +defragment_page: | 
| +    testcase( pPage->nCell==0 ); | 
| rc = defragmentPage(pPage); | 
| if( rc ) return rc; | 
| top = get2byteNotZero(&data[hdr+5]); | 
| @@ -1235,90 +1326,100 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ | 
|  | 
| /* | 
| ** Return a section of the pPage->aData to the freelist. | 
| -** The first byte of the new free block is pPage->aDisk[start] | 
| -** and the size of the block is "size" bytes. | 
| -** | 
| -** Most of the effort here is involved in coalesing adjacent | 
| -** free blocks into a single big free block. | 
| -*/ | 
| -static int freeSpace(MemPage *pPage, int start, int size){ | 
| -  int addr, pbegin, hdr; | 
| -  int iLast;                        /* Largest possible freeblock offset */ | 
| -  unsigned char *data = pPage->aData; | 
| +** The first byte of the new free block is pPage->aData[iStart] | 
| +** and the size of the block is iSize bytes. | 
| +** | 
| +** Adjacent freeblocks are coalesced. | 
| +** | 
| +** Note that even though the freeblock list was checked by btreeInitPage(), | 
| +** that routine will not detect overlap between cells or freeblocks.  Nor | 
| +** does it detect cells or freeblocks that encrouch into the reserved bytes | 
| +** at the end of the page.  So do additional corruption checks inside this | 
| +** routine and return SQLITE_CORRUPT if any problems are found. | 
| +*/ | 
| +static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ | 
| +  u16 iPtr;                             /* Address of ptr to next freeblock */ | 
| +  u16 iFreeBlk;                         /* Address of the next freeblock */ | 
| +  u8 hdr;                               /* Page header size.  0 or 100 */ | 
| +  u8 nFrag = 0;                         /* Reduction in fragmentation */ | 
| +  u16 iOrigSize = iSize;                /* Original value of iSize */ | 
| +  u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */ | 
| +  u32 iEnd = iStart + iSize;            /* First byte past the iStart buffer */ | 
| +  unsigned char *data = pPage->aData;   /* Page content */ | 
|  | 
| assert( pPage->pBt!=0 ); | 
| assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | 
| -  assert( start>=pPage->hdrOffset+6+pPage->childPtrSize ); | 
| -  assert( (start + size) <= (int)pPage->pBt->usableSize ); | 
| +  assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); | 
| +  assert( iEnd <= pPage->pBt->usableSize ); | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| -  assert( size>=0 );   /* Minimum cell size is 4 */ | 
| - | 
| -  if( pPage->pBt->secureDelete ){ | 
| -    /* Overwrite deleted information with zeros when the secure_delete | 
| -    ** option is enabled */ | 
| -    memset(&data[start], 0, size); | 
| -  } | 
| - | 
| -  /* Add the space back into the linked list of freeblocks.  Note that | 
| -  ** even though the freeblock list was checked by btreeInitPage(), | 
| -  ** btreeInitPage() did not detect overlapping cells or | 
| -  ** freeblocks that overlapped cells.   Nor does it detect when the | 
| -  ** cell content area exceeds the value in the page header.  If these | 
| -  ** situations arise, then subsequent insert operations might corrupt | 
| -  ** the freelist.  So we do need to check for corruption while scanning | 
| -  ** the freelist. | 
| +  assert( iSize>=4 );   /* Minimum cell size is 4 */ | 
| +  assert( iStart<=iLast ); | 
| + | 
| +  /* Overwrite deleted information with zeros when the secure_delete | 
| +  ** option is enabled */ | 
| +  if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){ | 
| +    memset(&data[iStart], 0, iSize); | 
| +  } | 
| + | 
| +  /* The list of freeblocks must be in ascending order.  Find the | 
| +  ** spot on the list where iStart should be inserted. | 
| */ | 
| hdr = pPage->hdrOffset; | 
| -  addr = hdr + 1; | 
| -  iLast = pPage->pBt->usableSize - 4; | 
| -  assert( start<=iLast ); | 
| -  while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){ | 
| -    if( pbegin<addr+4 ){ | 
| -      return SQLITE_CORRUPT_BKPT; | 
| +  iPtr = hdr + 1; | 
| +  if( data[iPtr+1]==0 && data[iPtr]==0 ){ | 
| +    iFreeBlk = 0;  /* Shortcut for the case when the freelist is empty */ | 
| +  }else{ | 
| +    while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){ | 
| +      if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT; | 
| +      iPtr = iFreeBlk; | 
| } | 
| -    addr = pbegin; | 
| -  } | 
| -  if( pbegin>iLast ){ | 
| -    return SQLITE_CORRUPT_BKPT; | 
| -  } | 
| -  assert( pbegin>addr || pbegin==0 ); | 
| -  put2byte(&data[addr], start); | 
| -  put2byte(&data[start], pbegin); | 
| -  put2byte(&data[start+2], size); | 
| -  pPage->nFree = pPage->nFree + (u16)size; | 
| - | 
| -  /* Coalesce adjacent free blocks */ | 
| -  addr = hdr + 1; | 
| -  while( (pbegin = get2byte(&data[addr]))>0 ){ | 
| -    int pnext, psize, x; | 
| -    assert( pbegin>addr ); | 
| -    assert( pbegin <= (int)pPage->pBt->usableSize-4 ); | 
| -    pnext = get2byte(&data[pbegin]); | 
| -    psize = get2byte(&data[pbegin+2]); | 
| -    if( pbegin + psize + 3 >= pnext && pnext>0 ){ | 
| -      int frag = pnext - (pbegin+psize); | 
| -      if( (frag<0) || (frag>(int)data[hdr+7]) ){ | 
| -        return SQLITE_CORRUPT_BKPT; | 
| +    if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT; | 
| +    assert( iFreeBlk>iPtr || iFreeBlk==0 ); | 
| + | 
| +    /* At this point: | 
| +    **    iFreeBlk:   First freeblock after iStart, or zero if none | 
| +    **    iPtr:       The address of a pointer iFreeBlk | 
| +    ** | 
| +    ** Check to see if iFreeBlk should be coalesced onto the end of iStart. | 
| +    */ | 
| +    if( iFreeBlk && iEnd+3>=iFreeBlk ){ | 
| +      nFrag = iFreeBlk - iEnd; | 
| +      if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; | 
| +      iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); | 
| +      iSize = iEnd - iStart; | 
| +      iFreeBlk = get2byte(&data[iFreeBlk]); | 
| +    } | 
| + | 
| +    /* If iPtr is another freeblock (that is, if iPtr is not the freelist | 
| +    ** pointer in the page header) then check to see if iStart should be | 
| +    ** coalesced onto the end of iPtr. | 
| +    */ | 
| +    if( iPtr>hdr+1 ){ | 
| +      int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); | 
| +      if( iPtrEnd+3>=iStart ){ | 
| +        if( iPtrEnd>iStart ) return SQLITE_CORRUPT_BKPT; | 
| +        nFrag += iStart - iPtrEnd; | 
| +        iSize = iEnd - iPtr; | 
| +        iStart = iPtr; | 
| } | 
| -      data[hdr+7] -= (u8)frag; | 
| -      x = get2byte(&data[pnext]); | 
| -      put2byte(&data[pbegin], x); | 
| -      x = pnext + get2byte(&data[pnext+2]) - pbegin; | 
| -      put2byte(&data[pbegin+2], x); | 
| -    }else{ | 
| -      addr = pbegin; | 
| } | 
| +    if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_BKPT; | 
| +    data[hdr+7] -= nFrag; | 
| } | 
| - | 
| -  /* If the cell content area begins with a freeblock, remove it. */ | 
| -  if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){ | 
| -    int top; | 
| -    pbegin = get2byte(&data[hdr+1]); | 
| -    memcpy(&data[hdr+1], &data[pbegin], 2); | 
| -    top = get2byte(&data[hdr+5]) + get2byte(&data[pbegin+2]); | 
| -    put2byte(&data[hdr+5], top); | 
| +  if( iStart==get2byte(&data[hdr+5]) ){ | 
| +    /* The new freeblock is at the beginning of the cell content area, | 
| +    ** so just extend the cell content area rather than create another | 
| +    ** freelist entry */ | 
| +    if( iPtr!=hdr+1 ) return SQLITE_CORRUPT_BKPT; | 
| +    put2byte(&data[hdr+1], iFreeBlk); | 
| +    put2byte(&data[hdr+5], iEnd); | 
| +  }else{ | 
| +    /* Insert the new freeblock into the freelist */ | 
| +    put2byte(&data[iPtr], iStart); | 
| +    put2byte(&data[iStart], iFreeBlk); | 
| +    put2byte(&data[iStart+2], iSize); | 
| } | 
| -  assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | 
| +  pPage->nFree += iOrigSize; | 
| return SQLITE_OK; | 
| } | 
|  | 
| @@ -1345,17 +1446,20 @@ static int decodeFlags(MemPage *pPage, int flagByte){ | 
| pBt = pPage->pBt; | 
| if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ | 
| pPage->intKey = 1; | 
| -    pPage->hasData = pPage->leaf; | 
| +    pPage->intKeyLeaf = pPage->leaf; | 
| +    pPage->noPayload = !pPage->leaf; | 
| pPage->maxLocal = pBt->maxLeaf; | 
| pPage->minLocal = pBt->minLeaf; | 
| }else if( flagByte==PTF_ZERODATA ){ | 
| pPage->intKey = 0; | 
| -    pPage->hasData = 0; | 
| +    pPage->intKeyLeaf = 0; | 
| +    pPage->noPayload = 0; | 
| pPage->maxLocal = pBt->maxLocal; | 
| pPage->minLocal = pBt->minLocal; | 
| }else{ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| +  pPage->max1bytePayload = pBt->max1bytePayload; | 
| return SQLITE_OK; | 
| } | 
|  | 
| @@ -1398,6 +1502,8 @@ static int btreeInitPage(MemPage *pPage){ | 
| pPage->nOverflow = 0; | 
| usableSize = pBt->usableSize; | 
| pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf; | 
| +    pPage->aDataEnd = &data[usableSize]; | 
| +    pPage->aCellIdx = &data[cellOffset]; | 
| top = get2byteNotZero(&data[hdr+5]); | 
| pPage->nCell = get2byte(&data[hdr+3]); | 
| if( pPage->nCell>MX_CELL(pBt) ){ | 
| @@ -1451,7 +1557,7 @@ static int btreeInitPage(MemPage *pPage){ | 
| size = get2byte(&data[pc+2]); | 
| if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){ | 
| /* Free blocks must be in ascending order. And the last byte of | 
| -	** the free-block must lie on the database page.  */ | 
| +        ** the free-block must lie on the database page.  */ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| nFree = nFree + size; | 
| @@ -1489,18 +1595,19 @@ static void zeroPage(MemPage *pPage, int flags){ | 
| assert( sqlite3PagerGetData(pPage->pDbPage) == data ); | 
| assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| -  if( pBt->secureDelete ){ | 
| +  if( pBt->btsFlags & BTS_SECURE_DELETE ){ | 
| memset(&data[hdr], 0, pBt->usableSize - hdr); | 
| } | 
| data[hdr] = (char)flags; | 
| -  first = hdr + 8 + 4*((flags&PTF_LEAF)==0 ?1:0); | 
| +  first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); | 
| memset(&data[hdr+1], 0, 4); | 
| data[hdr+7] = 0; | 
| put2byte(&data[hdr+5], pBt->usableSize); | 
| pPage->nFree = (u16)(pBt->usableSize - first); | 
| decodeFlags(pPage, flags); | 
| -  pPage->hdrOffset = hdr; | 
| pPage->cellOffset = first; | 
| +  pPage->aDataEnd = &data[pBt->usableSize]; | 
| +  pPage->aCellIdx = &data[first]; | 
| pPage->nOverflow = 0; | 
| assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); | 
| pPage->maskPage = (u16)(pBt->pageSize - 1); | 
| @@ -1538,13 +1645,14 @@ static int btreeGetPage( | 
| BtShared *pBt,       /* The btree */ | 
| Pgno pgno,           /* Number of the page to fetch */ | 
| MemPage **ppPage,    /* Return the page in this parameter */ | 
| -  int noContent        /* Do not load page content if true */ | 
| +  int flags            /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ | 
| ){ | 
| int rc; | 
| DbPage *pDbPage; | 
|  | 
| +  assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| -  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent); | 
| +  rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); | 
| if( rc ) return rc; | 
| *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); | 
| return SQLITE_OK; | 
| @@ -1575,7 +1683,7 @@ static Pgno btreePagecount(BtShared *pBt){ | 
| u32 sqlite3BtreeLastPage(Btree *p){ | 
| assert( sqlite3BtreeHoldsMutex(p) ); | 
| assert( ((p->pBt->nPage)&0x8000000)==0 ); | 
| -  return (int)btreePagecount(p->pBt); | 
| +  return btreePagecount(p->pBt); | 
| } | 
|  | 
| /* | 
| @@ -1587,18 +1695,20 @@ u32 sqlite3BtreeLastPage(Btree *p){ | 
| ** may remain unchanged, or it may be set to an invalid value. | 
| */ | 
| static int getAndInitPage( | 
| -  BtShared *pBt,          /* The database file */ | 
| -  Pgno pgno,           /* Number of the page to get */ | 
| -  MemPage **ppPage     /* Write the page pointer here */ | 
| +  BtShared *pBt,                  /* The database file */ | 
| +  Pgno pgno,                      /* Number of the page to get */ | 
| +  MemPage **ppPage,               /* Write the page pointer here */ | 
| +  int bReadonly                   /* PAGER_GET_READONLY or 0 */ | 
| ){ | 
| int rc; | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| +  assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 ); | 
|  | 
| if( pgno>btreePagecount(pBt) ){ | 
| rc = SQLITE_CORRUPT_BKPT; | 
| }else{ | 
| -    rc = btreeGetPage(pBt, pgno, ppPage, 0); | 
| -    if( rc==SQLITE_OK ){ | 
| +    rc = btreeGetPage(pBt, pgno, ppPage, bReadonly); | 
| +    if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ | 
| rc = btreeInitPage(*ppPage); | 
| if( rc!=SQLITE_OK ){ | 
| releasePage(*ppPage); | 
| @@ -1619,10 +1729,11 @@ static void releasePage(MemPage *pPage){ | 
| if( pPage ){ | 
| assert( pPage->aData ); | 
| assert( pPage->pBt ); | 
| +    assert( pPage->pDbPage!=0 ); | 
| assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); | 
| assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| -    sqlite3PagerUnref(pPage->pDbPage); | 
| +    sqlite3PagerUnrefNotNull(pPage->pDbPage); | 
| } | 
| } | 
|  | 
| @@ -1675,11 +1786,8 @@ static int btreeInvokeBusyHandler(void *pArg){ | 
| ** If zFilename is ":memory:" then an in-memory database is created | 
| ** that is automatically destroyed when it is closed. | 
| ** | 
| -** The "flags" parameter is a bitmask that might contain bits | 
| -** BTREE_OMIT_JOURNAL and/or BTREE_NO_READLOCK.  The BTREE_NO_READLOCK | 
| -** bit is also set if the SQLITE_NoReadlock flags is set in db->flags. | 
| -** These flags are passed through into sqlite3PagerOpen() and must | 
| -** be the same values as PAGER_OMIT_JOURNAL and PAGER_NO_READLOCK. | 
| +** The "flags" parameter is a bitmask that might contain bits like | 
| +** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. | 
| ** | 
| ** If the database is already opened in the same database connection | 
| ** and we are in shared cache mode, then the open will fail with an | 
| @@ -1688,13 +1796,13 @@ static int btreeInvokeBusyHandler(void *pArg){ | 
| ** to problems with locking. | 
| */ | 
| int sqlite3BtreeOpen( | 
| +  sqlite3_vfs *pVfs,      /* VFS to use for this b-tree */ | 
| const char *zFilename,  /* Name of the file containing the BTree database */ | 
| sqlite3 *db,            /* Associated database handle */ | 
| Btree **ppBtree,        /* Pointer to new Btree object written here */ | 
| int flags,              /* Options */ | 
| int vfsFlags            /* Flags passed through to sqlite3_vfs.xOpen() */ | 
| ){ | 
| -  sqlite3_vfs *pVfs;             /* The VFS to use for this btree */ | 
| BtShared *pBt = 0;             /* Shared part of btree structure */ | 
| Btree *p;                      /* Handle to return */ | 
| sqlite3_mutex *mutexOpen = 0;  /* Prevents a race condition. Ticket #3537 */ | 
| @@ -1712,10 +1820,12 @@ int sqlite3BtreeOpen( | 
| const int isMemdb = 0; | 
| #else | 
| const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0) | 
| -                       || (isTempDb && sqlite3TempInMemory(db)); | 
| +                       || (isTempDb && sqlite3TempInMemory(db)) | 
| +                       || (vfsFlags & SQLITE_OPEN_MEMORY)!=0; | 
| #endif | 
|  | 
| assert( db!=0 ); | 
| +  assert( pVfs!=0 ); | 
| assert( sqlite3_mutex_held(db->mutex) ); | 
| assert( (flags&0xff)==flags );   /* flags fit in 8 bits */ | 
|  | 
| @@ -1725,16 +1835,12 @@ int sqlite3BtreeOpen( | 
| /* A BTREE_SINGLE database is always a temporary and/or ephemeral */ | 
| assert( (flags & BTREE_SINGLE)==0 || isTempDb ); | 
|  | 
| -  if( db->flags & SQLITE_NoReadlock ){ | 
| -    flags |= BTREE_NO_READLOCK; | 
| -  } | 
| if( isMemdb ){ | 
| flags |= BTREE_MEMORY; | 
| } | 
| if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){ | 
| vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB; | 
| } | 
| -  pVfs = db->pVfs; | 
| p = sqlite3MallocZero(sizeof(Btree)); | 
| if( !p ){ | 
| return SQLITE_NOMEM; | 
| @@ -1751,24 +1857,36 @@ int sqlite3BtreeOpen( | 
| ** If this Btree is a candidate for shared cache, try to find an | 
| ** existing BtShared object that we can share with | 
| */ | 
| -  if( isMemdb==0 && isTempDb==0 ){ | 
| +  if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){ | 
| if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ | 
| int nFullPathname = pVfs->mxPathname+1; | 
| char *zFullPathname = sqlite3Malloc(nFullPathname); | 
| -      sqlite3_mutex *mutexShared; | 
| +      MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) | 
| p->sharable = 1; | 
| if( !zFullPathname ){ | 
| sqlite3_free(p); | 
| return SQLITE_NOMEM; | 
| } | 
| -      sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname); | 
| +      if( isMemdb ){ | 
| +        memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1); | 
| +      }else{ | 
| +        rc = sqlite3OsFullPathname(pVfs, zFilename, | 
| +                                   nFullPathname, zFullPathname); | 
| +        if( rc ){ | 
| +          sqlite3_free(zFullPathname); | 
| +          sqlite3_free(p); | 
| +          return rc; | 
| +        } | 
| +      } | 
| +#if SQLITE_THREADSAFE | 
| mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN); | 
| sqlite3_mutex_enter(mutexOpen); | 
| mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); | 
| sqlite3_mutex_enter(mutexShared); | 
| +#endif | 
| for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){ | 
| assert( pBt->nRef>0 ); | 
| -        if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager)) | 
| +        if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0)) | 
| && sqlite3PagerVfs(pBt->pPager)==pVfs ){ | 
| int iDb; | 
| for(iDb=db->nDb-1; iDb>=0; iDb--){ | 
| @@ -1821,6 +1939,7 @@ int sqlite3BtreeOpen( | 
| rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, | 
| EXTRA_SIZE, flags, vfsFlags, pageReinit); | 
| if( rc==SQLITE_OK ){ | 
| +      sqlite3PagerSetMmapLimit(pBt->pPager, db->szMmap); | 
| rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); | 
| } | 
| if( rc!=SQLITE_OK ){ | 
| @@ -1833,9 +1952,9 @@ int sqlite3BtreeOpen( | 
|  | 
| pBt->pCursor = 0; | 
| pBt->pPage1 = 0; | 
| -    pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager); | 
| +    if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY; | 
| #ifdef SQLITE_SECURE_DELETE | 
| -    pBt->secureDelete = 1; | 
| +    pBt->btsFlags |= BTS_SECURE_DELETE; | 
| #endif | 
| pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16); | 
| if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE | 
| @@ -1856,7 +1975,7 @@ int sqlite3BtreeOpen( | 
| nReserve = 0; | 
| }else{ | 
| nReserve = zDbHeader[20]; | 
| -      pBt->pageSizeFixed = 1; | 
| +      pBt->btsFlags |= BTS_PAGESIZE_FIXED; | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); | 
| pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); | 
| @@ -1871,9 +1990,9 @@ int sqlite3BtreeOpen( | 
| /* Add the new BtShared object to the linked list sharable BtShareds. | 
| */ | 
| if( p->sharable ){ | 
| -      sqlite3_mutex *mutexShared; | 
| +      MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) | 
| pBt->nRef = 1; | 
| -      mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); | 
| +      MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);) | 
| if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ | 
| pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); | 
| if( pBt->mutex==0 ){ | 
| @@ -1955,12 +2074,12 @@ btree_open_out: | 
| */ | 
| static int removeFromSharingList(BtShared *pBt){ | 
| #ifndef SQLITE_OMIT_SHARED_CACHE | 
| -  sqlite3_mutex *pMaster; | 
| +  MUTEX_LOGIC( sqlite3_mutex *pMaster; ) | 
| BtShared *pList; | 
| int removed = 0; | 
|  | 
| assert( sqlite3_mutex_notheld(pBt->mutex) ); | 
| -  pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); | 
| +  MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) | 
| sqlite3_mutex_enter(pMaster); | 
| pBt->nRef--; | 
| if( pBt->nRef<=0 ){ | 
| @@ -1989,11 +2108,32 @@ static int removeFromSharingList(BtShared *pBt){ | 
|  | 
| /* | 
| ** Make sure pBt->pTmpSpace points to an allocation of | 
| -** MX_CELL_SIZE(pBt) bytes. | 
| +** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child | 
| +** pointer. | 
| */ | 
| static void allocateTempSpace(BtShared *pBt){ | 
| if( !pBt->pTmpSpace ){ | 
| pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); | 
| + | 
| +    /* One of the uses of pBt->pTmpSpace is to format cells before | 
| +    ** inserting them into a leaf page (function fillInCell()). If | 
| +    ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes | 
| +    ** by the various routines that manipulate binary cells. Which | 
| +    ** can mean that fillInCell() only initializes the first 2 or 3 | 
| +    ** bytes of pTmpSpace, but that the first 4 bytes are copied from | 
| +    ** it into a database page. This is not actually a problem, but it | 
| +    ** does cause a valgrind error when the 1 or 2 bytes of unitialized | 
| +    ** data is passed to system call write(). So to avoid this error, | 
| +    ** zero the first 4 bytes of temp space here. | 
| +    ** | 
| +    ** Also:  Provide four bytes of initialized space before the | 
| +    ** beginning of pTmpSpace as an area available to prepend the | 
| +    ** left-child pointer to the beginning of a cell. | 
| +    */ | 
| +    if( pBt->pTmpSpace ){ | 
| +      memset(pBt->pTmpSpace, 0, 8); | 
| +      pBt->pTmpSpace += 4; | 
| +    } | 
| } | 
| } | 
|  | 
| @@ -2001,8 +2141,11 @@ static void allocateTempSpace(BtShared *pBt){ | 
| ** Free the pBt->pTmpSpace allocation | 
| */ | 
| static void freeTempSpace(BtShared *pBt){ | 
| -  sqlite3PageFree( pBt->pTmpSpace); | 
| -  pBt->pTmpSpace = 0; | 
| +  if( pBt->pTmpSpace ){ | 
| +    pBt->pTmpSpace -= 4; | 
| +    sqlite3PageFree(pBt->pTmpSpace); | 
| +    pBt->pTmpSpace = 0; | 
| +  } | 
| } | 
|  | 
| /* | 
| @@ -2028,7 +2171,7 @@ int sqlite3BtreeClose(Btree *p){ | 
| ** The call to sqlite3BtreeRollback() drops any table-locks held by | 
| ** this handle. | 
| */ | 
| -  sqlite3BtreeRollback(p); | 
| +  sqlite3BtreeRollback(p, SQLITE_OK, 0); | 
| sqlite3BtreeLeave(p); | 
|  | 
| /* If there are still other outstanding references to the shared-btree | 
| @@ -2087,6 +2230,21 @@ int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ | 
| return SQLITE_OK; | 
| } | 
|  | 
| +#if SQLITE_MAX_MMAP_SIZE>0 | 
| +/* | 
| +** Change the limit on the amount of the database file that may be | 
| +** memory mapped. | 
| +*/ | 
| +int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ | 
| +  BtShared *pBt = p->pBt; | 
| +  assert( sqlite3_mutex_held(p->db->mutex) ); | 
| +  sqlite3BtreeEnter(p); | 
| +  sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); | 
| +  sqlite3BtreeLeave(p); | 
| +  return SQLITE_OK; | 
| +} | 
| +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ | 
| + | 
| /* | 
| ** Change the way data is synced to disk in order to increase or decrease | 
| ** how well the database resists damage due to OS crashes and power | 
| @@ -2096,17 +2254,14 @@ int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ | 
| ** probability of damage to near zero but with a write performance reduction. | 
| */ | 
| #ifndef SQLITE_OMIT_PAGER_PRAGMAS | 
| -int sqlite3BtreeSetSafetyLevel( | 
| +int sqlite3BtreeSetPagerFlags( | 
| Btree *p,              /* The btree to set the safety level on */ | 
| -  int level,             /* PRAGMA synchronous.  1=OFF, 2=NORMAL, 3=FULL */ | 
| -  int fullSync,          /* PRAGMA fullfsync. */ | 
| -  int ckptFullSync       /* PRAGMA checkpoint_fullfync */ | 
| +  unsigned pgFlags       /* Various PAGER_* flags */ | 
| ){ | 
| BtShared *pBt = p->pBt; | 
| assert( sqlite3_mutex_held(p->db->mutex) ); | 
| -  assert( level>=1 && level<=3 ); | 
| sqlite3BtreeEnter(p); | 
| -  sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync, ckptFullSync); | 
| +  sqlite3PagerSetFlags(pBt->pPager, pgFlags); | 
| sqlite3BtreeLeave(p); | 
| return SQLITE_OK; | 
| } | 
| @@ -2144,7 +2299,7 @@ int sqlite3BtreeSyncDisabled(Btree *p){ | 
| ** If parameter nReserve is less than zero, then the number of reserved | 
| ** bytes per page is left unchanged. | 
| ** | 
| -** If the iFix!=0 then the pageSizeFixed flag is set so that the page size | 
| +** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size | 
| ** and autovacuum mode can no longer be changed. | 
| */ | 
| int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ | 
| @@ -2152,7 +2307,7 @@ int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ | 
| BtShared *pBt = p->pBt; | 
| assert( nReserve>=-1 && nReserve<=255 ); | 
| sqlite3BtreeEnter(p); | 
| -  if( pBt->pageSizeFixed ){ | 
| +  if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){ | 
| sqlite3BtreeLeave(p); | 
| return SQLITE_READONLY; | 
| } | 
| @@ -2169,7 +2324,7 @@ int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ | 
| } | 
| rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); | 
| pBt->usableSize = pBt->pageSize - (u16)nReserve; | 
| -  if( iFix ) pBt->pageSizeFixed = 1; | 
| +  if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED; | 
| sqlite3BtreeLeave(p); | 
| return rc; | 
| } | 
| @@ -2181,6 +2336,24 @@ int sqlite3BtreeGetPageSize(Btree *p){ | 
| return p->pBt->pageSize; | 
| } | 
|  | 
| +#if defined(SQLITE_HAS_CODEC) || defined(SQLITE_DEBUG) | 
| +/* | 
| +** This function is similar to sqlite3BtreeGetReserve(), except that it | 
| +** may only be called if it is guaranteed that the b-tree mutex is already | 
| +** held. | 
| +** | 
| +** This is useful in one special case in the backup API code where it is | 
| +** known that the shared b-tree mutex is held, but the mutex on the | 
| +** database handle that owns *p is not. In this case if sqlite3BtreeEnter() | 
| +** were to be called, it might collide with some other operation on the | 
| +** database handle that owns *p, causing undefined behavior. | 
| +*/ | 
| +int sqlite3BtreeGetReserveNoMutex(Btree *p){ | 
| +  assert( sqlite3_mutex_held(p->pBt->mutex) ); | 
| +  return p->pBt->pageSize - p->pBt->usableSize; | 
| +} | 
| +#endif /* SQLITE_HAS_CODEC || SQLITE_DEBUG */ | 
| + | 
| #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) | 
| /* | 
| ** Return the number of bytes of space at the end of every page that | 
| @@ -2209,8 +2382,8 @@ int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){ | 
| } | 
|  | 
| /* | 
| -** Set the secureDelete flag if newFlag is 0 or 1.  If newFlag is -1, | 
| -** then make no changes.  Always return the value of the secureDelete | 
| +** Set the BTS_SECURE_DELETE flag if newFlag is 0 or 1.  If newFlag is -1, | 
| +** then make no changes.  Always return the value of the BTS_SECURE_DELETE | 
| ** setting after the change. | 
| */ | 
| int sqlite3BtreeSecureDelete(Btree *p, int newFlag){ | 
| @@ -2218,9 +2391,10 @@ int sqlite3BtreeSecureDelete(Btree *p, int newFlag){ | 
| if( p==0 ) return 0; | 
| sqlite3BtreeEnter(p); | 
| if( newFlag>=0 ){ | 
| -    p->pBt->secureDelete = (newFlag!=0) ? 1 : 0; | 
| +    p->pBt->btsFlags &= ~BTS_SECURE_DELETE; | 
| +    if( newFlag ) p->pBt->btsFlags |= BTS_SECURE_DELETE; | 
| } | 
| -  b = p->pBt->secureDelete; | 
| +  b = (p->pBt->btsFlags & BTS_SECURE_DELETE)!=0; | 
| sqlite3BtreeLeave(p); | 
| return b; | 
| } | 
| @@ -2241,7 +2415,7 @@ int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ | 
| u8 av = (u8)autoVacuum; | 
|  | 
| sqlite3BtreeEnter(p); | 
| -  if( pBt->pageSizeFixed && (av ?1:0)!=pBt->autoVacuum ){ | 
| +  if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=0 && (av ?1:0)!=pBt->autoVacuum ){ | 
| rc = SQLITE_READONLY; | 
| }else{ | 
| pBt->autoVacuum = av ?1:0; | 
| @@ -2315,14 +2489,14 @@ static int lockBtree(BtShared *pBt){ | 
|  | 
| #ifdef SQLITE_OMIT_WAL | 
| if( page1[18]>1 ){ | 
| -      pBt->readOnly = 1; | 
| +      pBt->btsFlags |= BTS_READ_ONLY; | 
| } | 
| if( page1[19]>1 ){ | 
| goto page1_init_failed; | 
| } | 
| #else | 
| if( page1[18]>2 ){ | 
| -      pBt->readOnly = 1; | 
| +      pBt->btsFlags |= BTS_READ_ONLY; | 
| } | 
| if( page1[19]>2 ){ | 
| goto page1_init_failed; | 
| @@ -2336,7 +2510,7 @@ static int lockBtree(BtShared *pBt){ | 
| ** may not be the latest version - there may be a newer one in the log | 
| ** file. | 
| */ | 
| -    if( page1[19]==2 && pBt->doNotUseWAL==0 ){ | 
| +    if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ | 
| int isOpen = 0; | 
| rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); | 
| if( rc!=SQLITE_OK ){ | 
| @@ -2413,6 +2587,11 @@ static int lockBtree(BtShared *pBt){ | 
| pBt->minLocal = (u16)((pBt->usableSize-12)*32/255 - 23); | 
| pBt->maxLeaf = (u16)(pBt->usableSize - 35); | 
| pBt->minLeaf = (u16)((pBt->usableSize-12)*32/255 - 23); | 
| +  if( pBt->maxLocal>127 ){ | 
| +    pBt->max1bytePayload = 127; | 
| +  }else{ | 
| +    pBt->max1bytePayload = (u8)pBt->maxLocal; | 
| +  } | 
| assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); | 
| pBt->pPage1 = pPage1; | 
| pBt->nPage = nPage; | 
| @@ -2424,6 +2603,30 @@ page1_init_failed: | 
| return rc; | 
| } | 
|  | 
| +#ifndef NDEBUG | 
| +/* | 
| +** Return the number of cursors open on pBt. This is for use | 
| +** in assert() expressions, so it is only compiled if NDEBUG is not | 
| +** defined. | 
| +** | 
| +** Only write cursors are counted if wrOnly is true.  If wrOnly is | 
| +** false then all cursors are counted. | 
| +** | 
| +** For the purposes of this routine, a cursor is any cursor that | 
| +** is capable of reading or writing to the database.  Cursors that | 
| +** have been tripped into the CURSOR_FAULT state are not counted. | 
| +*/ | 
| +static int countValidCursors(BtShared *pBt, int wrOnly){ | 
| +  BtCursor *pCur; | 
| +  int r = 0; | 
| +  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ | 
| +    if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0) | 
| +     && pCur->eState!=CURSOR_FAULT ) r++; | 
| +  } | 
| +  return r; | 
| +} | 
| +#endif | 
| + | 
| /* | 
| ** If there are no outstanding cursors and we are not in the middle | 
| ** of a transaction but there is a read lock on the database, then | 
| @@ -2434,13 +2637,13 @@ page1_init_failed: | 
| */ | 
| static void unlockBtreeIfUnused(BtShared *pBt){ | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| -  assert( pBt->pCursor==0 || pBt->inTransaction>TRANS_NONE ); | 
| +  assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); | 
| if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ | 
| -    assert( pBt->pPage1->aData ); | 
| +    MemPage *pPage1 = pBt->pPage1; | 
| +    assert( pPage1->aData ); | 
| assert( sqlite3PagerRefcount(pBt->pPager)==1 ); | 
| -    assert( pBt->pPage1->aData ); | 
| -    releasePage(pBt->pPage1); | 
| pBt->pPage1 = 0; | 
| +    releasePage(pPage1); | 
| } | 
| } | 
|  | 
| @@ -2476,7 +2679,7 @@ static int newDatabase(BtShared *pBt){ | 
| data[23] = 32; | 
| memset(&data[24], 0, 100-24); | 
| zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); | 
| -  pBt->pageSizeFixed = 1; | 
| +  pBt->btsFlags |= BTS_PAGESIZE_FIXED; | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); | 
| assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); | 
| @@ -2489,6 +2692,20 @@ static int newDatabase(BtShared *pBt){ | 
| } | 
|  | 
| /* | 
| +** Initialize the first page of the database file (creating a database | 
| +** consisting of a single page and no schema objects). Return SQLITE_OK | 
| +** if successful, or an SQLite error code otherwise. | 
| +*/ | 
| +int sqlite3BtreeNewDb(Btree *p){ | 
| +  int rc; | 
| +  sqlite3BtreeEnter(p); | 
| +  p->pBt->nPage = 0; | 
| +  rc = newDatabase(p->pBt); | 
| +  sqlite3BtreeLeave(p); | 
| +  return rc; | 
| +} | 
| + | 
| +/* | 
| ** Attempt to start a new transaction. A write-transaction | 
| ** is started if the second argument is nonzero, otherwise a read- | 
| ** transaction.  If the second argument is 2 or more and exclusive | 
| @@ -2538,9 +2755,10 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | 
| if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ | 
| goto trans_begun; | 
| } | 
| +  assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); | 
|  | 
| /* Write transactions are not possible on a read-only database */ | 
| -  if( pBt->readOnly && wrflag ){ | 
| +  if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ | 
| rc = SQLITE_READONLY; | 
| goto trans_begun; | 
| } | 
| @@ -2550,7 +2768,9 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | 
| ** on this shared-btree structure and a second write transaction is | 
| ** requested, return SQLITE_LOCKED. | 
| */ | 
| -  if( (wrflag && pBt->inTransaction==TRANS_WRITE) || pBt->isPending ){ | 
| +  if( (wrflag && pBt->inTransaction==TRANS_WRITE) | 
| +   || (pBt->btsFlags & BTS_PENDING)!=0 | 
| +  ){ | 
| pBlock = pBt->pWriter->db; | 
| }else if( wrflag>1 ){ | 
| BtLock *pIter; | 
| @@ -2574,7 +2794,8 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | 
| rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK); | 
| if( SQLITE_OK!=rc ) goto trans_begun; | 
|  | 
| -  pBt->initiallyEmpty = (u8)(pBt->nPage==0); | 
| +  pBt->btsFlags &= ~BTS_INITIALLY_EMPTY; | 
| +  if( pBt->nPage==0 ) pBt->btsFlags |= BTS_INITIALLY_EMPTY; | 
| do { | 
| /* Call lockBtree() until either pBt->pPage1 is populated or | 
| ** lockBtree() returns something other than SQLITE_OK. lockBtree() | 
| @@ -2586,7 +2807,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | 
| while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) ); | 
|  | 
| if( rc==SQLITE_OK && wrflag ){ | 
| -      if( pBt->readOnly ){ | 
| +      if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ | 
| rc = SQLITE_READONLY; | 
| }else{ | 
| rc = sqlite3PagerBegin(pBt->pPager,wrflag>1,sqlite3TempInMemory(p->db)); | 
| @@ -2607,7 +2828,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | 
| pBt->nTransaction++; | 
| #ifndef SQLITE_OMIT_SHARED_CACHE | 
| if( p->sharable ){ | 
| -	assert( p->lock.pBtree==p && p->lock.iTable==1 ); | 
| +        assert( p->lock.pBtree==p && p->lock.iTable==1 ); | 
| p->lock.eLock = READ_LOCK; | 
| p->lock.pNext = pBt->pLock; | 
| pBt->pLock = &p->lock; | 
| @@ -2623,7 +2844,8 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ | 
| #ifndef SQLITE_OMIT_SHARED_CACHE | 
| assert( !pBt->pWriter ); | 
| pBt->pWriter = p; | 
| -      pBt->isExclusive = (u8)(wrflag>1); | 
| +      pBt->btsFlags &= ~BTS_EXCLUSIVE; | 
| +      if( wrflag>1 ) pBt->btsFlags |= BTS_EXCLUSIVE; | 
| #endif | 
|  | 
| /* If the db-size header field is incorrect (as it may be if an old | 
| @@ -2735,11 +2957,12 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ | 
| if( eType==PTRMAP_OVERFLOW1 ){ | 
| CellInfo info; | 
| btreeParseCellPtr(pPage, pCell, &info); | 
| -        if( info.iOverflow ){ | 
| -          if( iFrom==get4byte(&pCell[info.iOverflow]) ){ | 
| -            put4byte(&pCell[info.iOverflow], iTo); | 
| -            break; | 
| -          } | 
| +        if( info.iOverflow | 
| +         && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage | 
| +         && iFrom==get4byte(&pCell[info.iOverflow]) | 
| +        ){ | 
| +          put4byte(&pCell[info.iOverflow], iTo); | 
| +          break; | 
| } | 
| }else{ | 
| if( get4byte(pCell)==iFrom ){ | 
| @@ -2849,24 +3072,23 @@ static int relocatePage( | 
| static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); | 
|  | 
| /* | 
| -** Perform a single step of an incremental-vacuum. If successful, | 
| -** return SQLITE_OK. If there is no work to do (and therefore no | 
| -** point in calling this function again), return SQLITE_DONE. | 
| +** Perform a single step of an incremental-vacuum. If successful, return | 
| +** SQLITE_OK. If there is no work to do (and therefore no point in | 
| +** calling this function again), return SQLITE_DONE. Or, if an error | 
| +** occurs, return some other error code. | 
| +** | 
| +** More specifically, this function attempts to re-organize the database so | 
| +** that the last page of the file currently in use is no longer in use. | 
| ** | 
| -** More specificly, this function attempts to re-organize the | 
| -** database so that the last page of the file currently in use | 
| -** is no longer in use. | 
| +** Parameter nFin is the number of pages that this database would contain | 
| +** were this function called until it returns SQLITE_DONE. | 
| ** | 
| -** If the nFin parameter is non-zero, this function assumes | 
| -** that the caller will keep calling incrVacuumStep() until | 
| -** it returns SQLITE_DONE or an error, and that nFin is the | 
| -** number of pages the database file will contain after this | 
| -** process is complete.  If nFin is zero, it is assumed that | 
| -** incrVacuumStep() will be called a finite amount of times | 
| -** which may or may not empty the freelist.  A full autovacuum | 
| -** has nFin>0.  A "PRAGMA incremental_vacuum" has nFin==0. | 
| +** If the bCommit parameter is non-zero, this function assumes that the | 
| +** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE | 
| +** or an error. bCommit is passed true for an auto-vacuum-on-commit | 
| +** operation, or false for an incremental vacuum. | 
| */ | 
| -static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg){ | 
| +static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ | 
| Pgno nFreeList;           /* Number of pages still on the free-list */ | 
| int rc; | 
|  | 
| @@ -2891,15 +3113,15 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg){ | 
| } | 
|  | 
| if( eType==PTRMAP_FREEPAGE ){ | 
| -      if( nFin==0 ){ | 
| +      if( bCommit==0 ){ | 
| /* Remove the page from the files free-list. This is not required | 
| -        ** if nFin is non-zero. In that case, the free-list will be | 
| +        ** if bCommit is non-zero. In that case, the free-list will be | 
| ** truncated to zero after this function returns, so it doesn't | 
| ** matter if it still contains some garbage entries. | 
| */ | 
| Pgno iFreePg; | 
| MemPage *pFreePg; | 
| -        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1); | 
| +        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT); | 
| if( rc!=SQLITE_OK ){ | 
| return rc; | 
| } | 
| @@ -2909,34 +3131,37 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg){ | 
| } else { | 
| Pgno iFreePg;             /* Index of free page to move pLastPg to */ | 
| MemPage *pLastPg; | 
| +      u8 eMode = BTALLOC_ANY;   /* Mode parameter for allocateBtreePage() */ | 
| +      Pgno iNear = 0;           /* nearby parameter for allocateBtreePage() */ | 
|  | 
| rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); | 
| if( rc!=SQLITE_OK ){ | 
| return rc; | 
| } | 
|  | 
| -      /* If nFin is zero, this loop runs exactly once and page pLastPg | 
| +      /* If bCommit is zero, this loop runs exactly once and page pLastPg | 
| ** is swapped with the first free page pulled off the free list. | 
| ** | 
| -      ** On the other hand, if nFin is greater than zero, then keep | 
| +      ** On the other hand, if bCommit is greater than zero, then keep | 
| ** looping until a free-page located within the first nFin pages | 
| ** of the file is found. | 
| */ | 
| +      if( bCommit==0 ){ | 
| +        eMode = BTALLOC_LE; | 
| +        iNear = nFin; | 
| +      } | 
| do { | 
| MemPage *pFreePg; | 
| -        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0); | 
| +        rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode); | 
| if( rc!=SQLITE_OK ){ | 
| releasePage(pLastPg); | 
| return rc; | 
| } | 
| releasePage(pFreePg); | 
| -      }while( nFin!=0 && iFreePg>nFin ); | 
| +      }while( bCommit && iFreePg>nFin ); | 
| assert( iFreePg<iLastPg ); | 
|  | 
| -      rc = sqlite3PagerWrite(pLastPg->pDbPage); | 
| -      if( rc==SQLITE_OK ){ | 
| -        rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0); | 
| -      } | 
| +      rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, bCommit); | 
| releasePage(pLastPg); | 
| if( rc!=SQLITE_OK ){ | 
| return rc; | 
| @@ -2944,30 +3169,40 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg){ | 
| } | 
| } | 
|  | 
| -  if( nFin==0 ){ | 
| -    iLastPg--; | 
| -    while( iLastPg==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, iLastPg) ){ | 
| -      if( PTRMAP_ISPAGE(pBt, iLastPg) ){ | 
| -        MemPage *pPg; | 
| -        rc = btreeGetPage(pBt, iLastPg, &pPg, 0); | 
| -        if( rc!=SQLITE_OK ){ | 
| -          return rc; | 
| -        } | 
| -        rc = sqlite3PagerWrite(pPg->pDbPage); | 
| -        releasePage(pPg); | 
| -        if( rc!=SQLITE_OK ){ | 
| -          return rc; | 
| -        } | 
| -      } | 
| +  if( bCommit==0 ){ | 
| +    do { | 
| iLastPg--; | 
| -    } | 
| -    sqlite3PagerTruncateImage(pBt->pPager, iLastPg); | 
| +    }while( iLastPg==PENDING_BYTE_PAGE(pBt) || PTRMAP_ISPAGE(pBt, iLastPg) ); | 
| +    pBt->bDoTruncate = 1; | 
| pBt->nPage = iLastPg; | 
| } | 
| return SQLITE_OK; | 
| } | 
|  | 
| /* | 
| +** The database opened by the first argument is an auto-vacuum database | 
| +** nOrig pages in size containing nFree free pages. Return the expected | 
| +** size of the database in pages following an auto-vacuum operation. | 
| +*/ | 
| +static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){ | 
| +  int nEntry;                     /* Number of entries on one ptrmap page */ | 
| +  Pgno nPtrmap;                   /* Number of PtrMap pages to be freed */ | 
| +  Pgno nFin;                      /* Return value */ | 
| + | 
| +  nEntry = pBt->usableSize/5; | 
| +  nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; | 
| +  nFin = nOrig - nFree - nPtrmap; | 
| +  if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){ | 
| +    nFin--; | 
| +  } | 
| +  while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){ | 
| +    nFin--; | 
| +  } | 
| + | 
| +  return nFin; | 
| +} | 
| + | 
| +/* | 
| ** A write-transaction must be opened before calling this function. | 
| ** It performs a single unit of work towards an incremental vacuum. | 
| ** | 
| @@ -2984,11 +3219,24 @@ int sqlite3BtreeIncrVacuum(Btree *p){ | 
| if( !pBt->autoVacuum ){ | 
| rc = SQLITE_DONE; | 
| }else{ | 
| -    invalidateAllOverflowCache(pBt); | 
| -    rc = incrVacuumStep(pBt, 0, btreePagecount(pBt)); | 
| -    if( rc==SQLITE_OK ){ | 
| -      rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); | 
| -      put4byte(&pBt->pPage1->aData[28], pBt->nPage); | 
| +    Pgno nOrig = btreePagecount(pBt); | 
| +    Pgno nFree = get4byte(&pBt->pPage1->aData[36]); | 
| +    Pgno nFin = finalDbSize(pBt, nOrig, nFree); | 
| + | 
| +    if( nOrig<nFin ){ | 
| +      rc = SQLITE_CORRUPT_BKPT; | 
| +    }else if( nFree>0 ){ | 
| +      rc = saveAllCursors(pBt, 0, 0); | 
| +      if( rc==SQLITE_OK ){ | 
| +        invalidateAllOverflowCache(pBt); | 
| +        rc = incrVacuumStep(pBt, nFin, nOrig, 0); | 
| +      } | 
| +      if( rc==SQLITE_OK ){ | 
| +        rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); | 
| +        put4byte(&pBt->pPage1->aData[28], pBt->nPage); | 
| +      } | 
| +    }else{ | 
| +      rc = SQLITE_DONE; | 
| } | 
| } | 
| sqlite3BtreeLeave(p); | 
| @@ -2997,7 +3245,7 @@ int sqlite3BtreeIncrVacuum(Btree *p){ | 
|  | 
| /* | 
| ** This routine is called prior to sqlite3PagerCommit when a transaction | 
| -** is commited for an auto-vacuum database. | 
| +** is committed for an auto-vacuum database. | 
| ** | 
| ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages | 
| ** the database file should be truncated to during the commit process. | 
| @@ -3015,9 +3263,7 @@ static int autoVacuumCommit(BtShared *pBt){ | 
| if( !pBt->incrVacuum ){ | 
| Pgno nFin;         /* Number of pages in database after autovacuuming */ | 
| Pgno nFree;        /* Number of pages on the freelist initially */ | 
| -    Pgno nPtrmap;      /* Number of PtrMap pages to be freed */ | 
| Pgno iFree;        /* The next page to be freed */ | 
| -    int nEntry;        /* Number of entries on one ptrmap page */ | 
| Pgno nOrig;        /* Database size before freeing */ | 
|  | 
| nOrig = btreePagecount(pBt); | 
| @@ -3030,26 +3276,20 @@ static int autoVacuumCommit(BtShared *pBt){ | 
| } | 
|  | 
| nFree = get4byte(&pBt->pPage1->aData[36]); | 
| -    nEntry = pBt->usableSize/5; | 
| -    nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; | 
| -    nFin = nOrig - nFree - nPtrmap; | 
| -    if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){ | 
| -      nFin--; | 
| -    } | 
| -    while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){ | 
| -      nFin--; | 
| -    } | 
| +    nFin = finalDbSize(pBt, nOrig, nFree); | 
| if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; | 
| - | 
| +    if( nFin<nOrig ){ | 
| +      rc = saveAllCursors(pBt, 0, 0); | 
| +    } | 
| for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){ | 
| -      rc = incrVacuumStep(pBt, nFin, iFree); | 
| +      rc = incrVacuumStep(pBt, nFin, iFree, 1); | 
| } | 
| if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ | 
| rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); | 
| put4byte(&pBt->pPage1->aData[32], 0); | 
| put4byte(&pBt->pPage1->aData[36], 0); | 
| put4byte(&pBt->pPage1->aData[28], nFin); | 
| -      sqlite3PagerTruncateImage(pBt->pPager, nFin); | 
| +      pBt->bDoTruncate = 1; | 
| pBt->nPage = nFin; | 
| } | 
| if( rc!=SQLITE_OK ){ | 
| @@ -3057,7 +3297,7 @@ static int autoVacuumCommit(BtShared *pBt){ | 
| } | 
| } | 
|  | 
| -  assert( nRef==sqlite3PagerRefcount(pPager) ); | 
| +  assert( nRef>=sqlite3PagerRefcount(pPager) ); | 
| return rc; | 
| } | 
|  | 
| @@ -3104,6 +3344,9 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ | 
| return rc; | 
| } | 
| } | 
| +    if( pBt->bDoTruncate ){ | 
| +      sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); | 
| +    } | 
| #endif | 
| rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0); | 
| sqlite3BtreeLeave(p); | 
| @@ -3117,10 +3360,13 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ | 
| */ | 
| static void btreeEndTransaction(Btree *p){ | 
| BtShared *pBt = p->pBt; | 
| +  sqlite3 *db = p->db; | 
| assert( sqlite3BtreeHoldsMutex(p) ); | 
|  | 
| -  btreeClearHasContent(pBt); | 
| -  if( p->inTrans>TRANS_NONE && p->db->activeVdbeCnt>1 ){ | 
| +#ifndef SQLITE_OMIT_AUTOVACUUM | 
| +  pBt->bDoTruncate = 0; | 
| +#endif | 
| +  if( p->inTrans>TRANS_NONE && db->nVdbeRead>1 ){ | 
| /* If there are other active statements that belong to this database | 
| ** handle, downgrade to a read-only transaction. The other statements | 
| ** may still be reading from the database.  */ | 
| @@ -3194,6 +3440,7 @@ int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ | 
| return rc; | 
| } | 
| pBt->inTransaction = TRANS_READ; | 
| +    btreeClearHasContent(pBt); | 
| } | 
|  | 
| btreeEndTransaction(p); | 
| @@ -3215,88 +3462,94 @@ int sqlite3BtreeCommit(Btree *p){ | 
| return rc; | 
| } | 
|  | 
| -#ifndef NDEBUG | 
| -/* | 
| -** Return the number of write-cursors open on this handle. This is for use | 
| -** in assert() expressions, so it is only compiled if NDEBUG is not | 
| -** defined. | 
| -** | 
| -** For the purposes of this routine, a write-cursor is any cursor that | 
| -** is capable of writing to the databse.  That means the cursor was | 
| -** originally opened for writing and the cursor has not be disabled | 
| -** by having its state changed to CURSOR_FAULT. | 
| -*/ | 
| -static int countWriteCursors(BtShared *pBt){ | 
| -  BtCursor *pCur; | 
| -  int r = 0; | 
| -  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ | 
| -    if( pCur->wrFlag && pCur->eState!=CURSOR_FAULT ) r++; | 
| -  } | 
| -  return r; | 
| -} | 
| -#endif | 
| - | 
| /* | 
| ** This routine sets the state to CURSOR_FAULT and the error | 
| -** code to errCode for every cursor on BtShared that pBtree | 
| -** references. | 
| -** | 
| -** Every cursor is tripped, including cursors that belong | 
| -** to other database connections that happen to be sharing | 
| -** the cache with pBtree. | 
| -** | 
| -** This routine gets called when a rollback occurs. | 
| -** All cursors using the same cache must be tripped | 
| -** to prevent them from trying to use the btree after | 
| -** the rollback.  The rollback may have deleted tables | 
| -** or moved root pages, so it is not sufficient to | 
| -** save the state of the cursor.  The cursor must be | 
| -** invalidated. | 
| -*/ | 
| -void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){ | 
| +** code to errCode for every cursor on any BtShared that pBtree | 
| +** references.  Or if the writeOnly flag is set to 1, then only | 
| +** trip write cursors and leave read cursors unchanged. | 
| +** | 
| +** Every cursor is a candidate to be tripped, including cursors | 
| +** that belong to other database connections that happen to be | 
| +** sharing the cache with pBtree. | 
| +** | 
| +** This routine gets called when a rollback occurs. If the writeOnly | 
| +** flag is true, then only write-cursors need be tripped - read-only | 
| +** cursors save their current positions so that they may continue | 
| +** following the rollback. Or, if writeOnly is false, all cursors are | 
| +** tripped. In general, writeOnly is false if the transaction being | 
| +** rolled back modified the database schema. In this case b-tree root | 
| +** pages may be moved or deleted from the database altogether, making | 
| +** it unsafe for read cursors to continue. | 
| +** | 
| +** If the writeOnly flag is true and an error is encountered while | 
| +** saving the current position of a read-only cursor, all cursors, | 
| +** including all read-cursors are tripped. | 
| +** | 
| +** SQLITE_OK is returned if successful, or if an error occurs while | 
| +** saving a cursor position, an SQLite error code. | 
| +*/ | 
| +int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ | 
| BtCursor *p; | 
| -  sqlite3BtreeEnter(pBtree); | 
| -  for(p=pBtree->pBt->pCursor; p; p=p->pNext){ | 
| -    int i; | 
| -    sqlite3BtreeClearCursor(p); | 
| -    p->eState = CURSOR_FAULT; | 
| -    p->skipNext = errCode; | 
| -    for(i=0; i<=p->iPage; i++){ | 
| -      releasePage(p->apPage[i]); | 
| -      p->apPage[i] = 0; | 
| +  int rc = SQLITE_OK; | 
| + | 
| +  assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); | 
| +  if( pBtree ){ | 
| +    sqlite3BtreeEnter(pBtree); | 
| +    for(p=pBtree->pBt->pCursor; p; p=p->pNext){ | 
| +      int i; | 
| +      if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ | 
| +        if( p->eState==CURSOR_VALID ){ | 
| +          rc = saveCursorPosition(p); | 
| +          if( rc!=SQLITE_OK ){ | 
| +            (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); | 
| +            break; | 
| +          } | 
| +        } | 
| +      }else{ | 
| +        sqlite3BtreeClearCursor(p); | 
| +        p->eState = CURSOR_FAULT; | 
| +        p->skipNext = errCode; | 
| +      } | 
| +      for(i=0; i<=p->iPage; i++){ | 
| +        releasePage(p->apPage[i]); | 
| +        p->apPage[i] = 0; | 
| +      } | 
| } | 
| +    sqlite3BtreeLeave(pBtree); | 
| } | 
| -  sqlite3BtreeLeave(pBtree); | 
| +  return rc; | 
| } | 
|  | 
| /* | 
| -** Rollback the transaction in progress.  All cursors will be | 
| -** invalided by this operation.  Any attempt to use a cursor | 
| -** that was open at the beginning of this operation will result | 
| -** in an error. | 
| +** Rollback the transaction in progress. | 
| +** | 
| +** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). | 
| +** Only write cursors are tripped if writeOnly is true but all cursors are | 
| +** tripped if writeOnly is false.  Any attempt to use | 
| +** a tripped cursor will result in an error. | 
| ** | 
| ** This will release the write lock on the database file.  If there | 
| ** are no active cursors, it also releases the read lock. | 
| */ | 
| -int sqlite3BtreeRollback(Btree *p){ | 
| +int sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){ | 
| int rc; | 
| BtShared *pBt = p->pBt; | 
| MemPage *pPage1; | 
|  | 
| +  assert( writeOnly==1 || writeOnly==0 ); | 
| +  assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK ); | 
| sqlite3BtreeEnter(p); | 
| -  rc = saveAllCursors(pBt, 0, 0); | 
| -#ifndef SQLITE_OMIT_SHARED_CACHE | 
| -  if( rc!=SQLITE_OK ){ | 
| -    /* This is a horrible situation. An IO or malloc() error occurred whilst | 
| -    ** trying to save cursor positions. If this is an automatic rollback (as | 
| -    ** the result of a constraint, malloc() failure or IO error) then | 
| -    ** the cache may be internally inconsistent (not contain valid trees) so | 
| -    ** we cannot simply return the error to the caller. Instead, abort | 
| -    ** all queries that may be using any of the cursors that failed to save. | 
| -    */ | 
| -    sqlite3BtreeTripAllCursors(p, rc); | 
| +  if( tripCode==SQLITE_OK ){ | 
| +    rc = tripCode = saveAllCursors(pBt, 0, 0); | 
| +    if( rc ) writeOnly = 0; | 
| +  }else{ | 
| +    rc = SQLITE_OK; | 
| +  } | 
| +  if( tripCode ){ | 
| +    int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly); | 
| +    assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) ); | 
| +    if( rc2!=SQLITE_OK ) rc = rc2; | 
| } | 
| -#endif | 
| btreeIntegrity(p); | 
|  | 
| if( p->inTrans==TRANS_WRITE ){ | 
| @@ -3319,8 +3572,9 @@ int sqlite3BtreeRollback(Btree *p){ | 
| pBt->nPage = nPage; | 
| releasePage(pPage1); | 
| } | 
| -    assert( countWriteCursors(pBt)==0 ); | 
| +    assert( countValidCursors(pBt, 1)==0 ); | 
| pBt->inTransaction = TRANS_READ; | 
| +    btreeClearHasContent(pBt); | 
| } | 
|  | 
| btreeEndTransaction(p); | 
| @@ -3329,7 +3583,7 @@ int sqlite3BtreeRollback(Btree *p){ | 
| } | 
|  | 
| /* | 
| -** Start a statement subtransaction. The subtransaction can can be rolled | 
| +** Start a statement subtransaction. The subtransaction can be rolled | 
| ** back independently of the main transaction. You must start a transaction | 
| ** before starting a subtransaction. The subtransaction is ended automatically | 
| ** if the main transaction commits or rolls back. | 
| @@ -3351,7 +3605,7 @@ int sqlite3BtreeBeginStmt(Btree *p, int iStatement){ | 
| BtShared *pBt = p->pBt; | 
| sqlite3BtreeEnter(p); | 
| assert( p->inTrans==TRANS_WRITE ); | 
| -  assert( pBt->readOnly==0 ); | 
| +  assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); | 
| assert( iStatement>0 ); | 
| assert( iStatement>p->db->nSavepoint ); | 
| assert( pBt->inTransaction==TRANS_WRITE ); | 
| @@ -3386,7 +3640,9 @@ int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ | 
| sqlite3BtreeEnter(p); | 
| rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); | 
| if( rc==SQLITE_OK ){ | 
| -      if( iSavepoint<0 && pBt->initiallyEmpty ) pBt->nPage = 0; | 
| +      if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){ | 
| +        pBt->nPage = 0; | 
| +      } | 
| rc = newDatabase(pBt); | 
| pBt->nPage = get4byte(28 + pBt->pPage1->aData); | 
|  | 
| @@ -3456,11 +3712,16 @@ static int btreeCursor( | 
| assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); | 
| assert( pBt->pPage1 && pBt->pPage1->aData ); | 
|  | 
| -  if( NEVER(wrFlag && pBt->readOnly) ){ | 
| +  if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){ | 
| return SQLITE_READONLY; | 
| } | 
| +  if( wrFlag ){ | 
| +    allocateTempSpace(pBt); | 
| +    if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; | 
| +  } | 
| if( iTable==1 && btreePagecount(pBt)==0 ){ | 
| -    return SQLITE_EMPTY; | 
| +    assert( wrFlag==0 ); | 
| +    iTable = 0; | 
| } | 
|  | 
| /* Now that no other errors can occur, finish filling in the BtCursor | 
| @@ -3470,14 +3731,14 @@ static int btreeCursor( | 
| pCur->pKeyInfo = pKeyInfo; | 
| pCur->pBtree = p; | 
| pCur->pBt = pBt; | 
| -  pCur->wrFlag = (u8)wrFlag; | 
| +  assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); | 
| +  pCur->curFlags = wrFlag; | 
| pCur->pNext = pBt->pCursor; | 
| if( pCur->pNext ){ | 
| pCur->pNext->pPrev = pCur; | 
| } | 
| pBt->pCursor = pCur; | 
| pCur->eState = CURSOR_INVALID; | 
| -  pCur->cachedRowid = 0; | 
| return SQLITE_OK; | 
| } | 
| int sqlite3BtreeCursor( | 
| @@ -3519,36 +3780,6 @@ void sqlite3BtreeCursorZero(BtCursor *p){ | 
| } | 
|  | 
| /* | 
| -** Set the cached rowid value of every cursor in the same database file | 
| -** as pCur and having the same root page number as pCur.  The value is | 
| -** set to iRowid. | 
| -** | 
| -** Only positive rowid values are considered valid for this cache. | 
| -** The cache is initialized to zero, indicating an invalid cache. | 
| -** A btree will work fine with zero or negative rowids.  We just cannot | 
| -** cache zero or negative rowids, which means tables that use zero or | 
| -** negative rowids might run a little slower.  But in practice, zero | 
| -** or negative rowids are very uncommon so this should not be a problem. | 
| -*/ | 
| -void sqlite3BtreeSetCachedRowid(BtCursor *pCur, sqlite3_int64 iRowid){ | 
| -  BtCursor *p; | 
| -  for(p=pCur->pBt->pCursor; p; p=p->pNext){ | 
| -    if( p->pgnoRoot==pCur->pgnoRoot ) p->cachedRowid = iRowid; | 
| -  } | 
| -  assert( pCur->cachedRowid==iRowid ); | 
| -} | 
| - | 
| -/* | 
| -** Return the cached rowid for the given cursor.  A negative or zero | 
| -** return value indicates that the rowid cache is invalid and should be | 
| -** ignored.  If the rowid cache has never before been set, then a | 
| -** zero is returned. | 
| -*/ | 
| -sqlite3_int64 sqlite3BtreeGetCachedRowid(BtCursor *pCur){ | 
| -  return pCur->cachedRowid; | 
| -} | 
| - | 
| -/* | 
| ** Close a cursor.  The read lock on the database file is released | 
| ** when the last cursor is closed. | 
| */ | 
| @@ -3571,7 +3802,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ | 
| releasePage(pCur->apPage[i]); | 
| } | 
| unlockBtreeIfUnused(pBt); | 
| -    invalidateOverflowCache(pCur); | 
| +    sqlite3DbFree(pBtree->db, pCur->aOverflow); | 
| /* sqlite3_free(pCur); */ | 
| sqlite3BtreeLeave(pBtree); | 
| } | 
| @@ -3590,7 +3821,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ | 
| ** compiler to crash when getCellInfo() is implemented as a macro. | 
| ** But there is a measureable speed advantage to using the macro on gcc | 
| ** (when less compiler optimizations like -Os or -O0 are used and the | 
| -** compiler is not doing agressive inlining.)  So we use a real function | 
| +** compiler is not doing aggressive inlining.)  So we use a real function | 
| ** for MSVC and a macro for everything else.  Ticket #2457. | 
| */ | 
| #ifndef NDEBUG | 
| @@ -3599,7 +3830,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ | 
| int iPage = pCur->iPage; | 
| memset(&info, 0, sizeof(info)); | 
| btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info); | 
| -    assert( memcmp(&info, &pCur->info, sizeof(info))==0 ); | 
| +    assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 ); | 
| } | 
| #else | 
| #define assertCellInfo(x) | 
| @@ -3610,7 +3841,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ | 
| if( pCur->info.nSize==0 ){ | 
| int iPage = pCur->iPage; | 
| btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); | 
| -      pCur->validNKey = 1; | 
| +      pCur->curFlags |= BTCF_ValidNKey; | 
| }else{ | 
| assertCellInfo(pCur); | 
| } | 
| @@ -3620,8 +3851,8 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ | 
| #define getCellInfo(pCur)                                                      \ | 
| if( pCur->info.nSize==0 ){                                                   \ | 
| int iPage = pCur->iPage;                                                   \ | 
| -    btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \ | 
| -    pCur->validNKey = 1;                                                       \ | 
| +    btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info);        \ | 
| +    pCur->curFlags |= BTCF_ValidNKey;                                          \ | 
| }else{                                                                       \ | 
| assertCellInfo(pCur);                                                      \ | 
| } | 
| @@ -3652,13 +3883,9 @@ int sqlite3BtreeCursorIsValid(BtCursor *pCur){ | 
| */ | 
| int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ | 
| assert( cursorHoldsMutex(pCur) ); | 
| -  assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID ); | 
| -  if( pCur->eState!=CURSOR_VALID ){ | 
| -    *pSize = 0; | 
| -  }else{ | 
| -    getCellInfo(pCur); | 
| -    *pSize = pCur->info.nKey; | 
| -  } | 
| +  assert( pCur->eState==CURSOR_VALID ); | 
| +  getCellInfo(pCur); | 
| +  *pSize = pCur->info.nKey; | 
| return SQLITE_OK; | 
| } | 
|  | 
| @@ -3677,8 +3904,9 @@ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ | 
| int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ | 
| assert( cursorHoldsMutex(pCur) ); | 
| assert( pCur->eState==CURSOR_VALID ); | 
| +  assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 ); | 
| getCellInfo(pCur); | 
| -  *pSize = pCur->info.nData; | 
| +  *pSize = pCur->info.nPayload; | 
| return SQLITE_OK; | 
| } | 
|  | 
| @@ -3742,7 +3970,7 @@ static int getOverflowPage( | 
|  | 
| assert( next==0 || rc==SQLITE_DONE ); | 
| if( rc==SQLITE_OK ){ | 
| -    rc = btreeGetPage(pBt, ovfl, &pPage, 0); | 
| +    rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0); | 
| assert( rc==SQLITE_OK || pPage==0 ); | 
| if( rc==SQLITE_OK ){ | 
| next = get4byte(pPage->aData); | 
| @@ -3792,10 +4020,12 @@ static int copyPayload( | 
|  | 
| /* | 
| ** This function is used to read or overwrite payload information | 
| -** for the entry that the pCur cursor is pointing to. If the eOp | 
| -** parameter is 0, this is a read operation (data copied into | 
| -** buffer pBuf). If it is non-zero, a write (data copied from | 
| -** buffer pBuf). | 
| +** for the entry that the pCur cursor is pointing to. The eOp | 
| +** argument is interpreted as follows: | 
| +** | 
| +**   0: The operation is a read. Populate the overflow cache. | 
| +**   1: The operation is a write. Populate the overflow cache. | 
| +**   2: The operation is a read. Do not populate the overflow cache. | 
| ** | 
| ** A total of "amt" bytes are read or written beginning at "offset". | 
| ** Data is read to or from the buffer pBuf. | 
| @@ -3803,11 +4033,11 @@ static int copyPayload( | 
| ** The content being read or written might appear on the main page | 
| ** or be scattered out on multiple overflow pages. | 
| ** | 
| -** If the BtCursor.isIncrblobHandle flag is set, and the current | 
| -** cursor entry uses one or more overflow pages, this function | 
| -** allocates space for and lazily popluates the overflow page-list | 
| -** cache array (BtCursor.aOverflow). Subsequent calls use this | 
| -** cache to make seeking to the supplied offset more efficient. | 
| +** If the current cursor entry uses one or more overflow pages and the | 
| +** eOp argument is not 2, this function may allocate space for and lazily | 
| +** populates the overflow page-list cache array (BtCursor.aOverflow). | 
| +** Subsequent calls use this cache to make seeking to the supplied offset | 
| +** more efficient. | 
| ** | 
| ** Once an overflow page-list cache has been allocated, it may be | 
| ** invalidated if some other cursor writes to the same table, or if | 
| @@ -3827,23 +4057,28 @@ static int accessPayload( | 
| ){ | 
| unsigned char *aPayload; | 
| int rc = SQLITE_OK; | 
| -  u32 nKey; | 
| int iIdx = 0; | 
| MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ | 
| BtShared *pBt = pCur->pBt;                  /* Btree this cursor belongs to */ | 
| +#ifdef SQLITE_DIRECT_OVERFLOW_READ | 
| +  unsigned char * const pBufStart = pBuf; | 
| +  int bEnd;                                 /* True if reading to end of data */ | 
| +#endif | 
|  | 
| assert( pPage ); | 
| assert( pCur->eState==CURSOR_VALID ); | 
| assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); | 
| assert( cursorHoldsMutex(pCur) ); | 
| +  assert( eOp!=2 || offset==0 );    /* Always start from beginning for eOp==2 */ | 
|  | 
| getCellInfo(pCur); | 
| -  aPayload = pCur->info.pCell + pCur->info.nHeader; | 
| -  nKey = (pPage->intKey ? 0 : (int)pCur->info.nKey); | 
| +  aPayload = pCur->info.pPayload; | 
| +#ifdef SQLITE_DIRECT_OVERFLOW_READ | 
| +  bEnd = offset+amt==pCur->info.nPayload; | 
| +#endif | 
| +  assert( offset+amt <= pCur->info.nPayload ); | 
|  | 
| -  if( NEVER(offset+amt > nKey+pCur->info.nData) | 
| -   || &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] | 
| -  ){ | 
| +  if( &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] ){ | 
| /* Trying to read or write past the end of the data is an error */ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| @@ -3854,7 +4089,7 @@ static int accessPayload( | 
| if( a+offset>pCur->info.nLocal ){ | 
| a = pCur->info.nLocal - offset; | 
| } | 
| -    rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); | 
| +    rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); | 
| offset = 0; | 
| pBuf += a; | 
| amt -= a; | 
| @@ -3868,21 +4103,30 @@ static int accessPayload( | 
|  | 
| nextPage = get4byte(&aPayload[pCur->info.nLocal]); | 
|  | 
| -#ifndef SQLITE_OMIT_INCRBLOB | 
| -    /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[] | 
| -    ** has not been allocated, allocate it now. The array is sized at | 
| -    ** one entry for each overflow page in the overflow chain. The | 
| -    ** page number of the first overflow page is stored in aOverflow[0], | 
| -    ** etc. A value of 0 in the aOverflow[] array means "not yet known" | 
| -    ** (the cache is lazily populated). | 
| +    /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. | 
| +    ** Except, do not allocate aOverflow[] for eOp==2. | 
| +    ** | 
| +    ** The aOverflow[] array is sized at one entry for each overflow page | 
| +    ** in the overflow chain. The page number of the first overflow page is | 
| +    ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array | 
| +    ** means "not yet known" (the cache is lazily populated). | 
| */ | 
| -    if( pCur->isIncrblobHandle && !pCur->aOverflow ){ | 
| +    if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){ | 
| int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; | 
| -      pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl); | 
| -      /* nOvfl is always positive.  If it were zero, fetchPayload would have | 
| -      ** been used instead of this routine. */ | 
| -      if( ALWAYS(nOvfl) && !pCur->aOverflow ){ | 
| -        rc = SQLITE_NOMEM; | 
| +      if( nOvfl>pCur->nOvflAlloc ){ | 
| +        Pgno *aNew = (Pgno*)sqlite3DbRealloc( | 
| +            pCur->pBtree->db, pCur->aOverflow, nOvfl*2*sizeof(Pgno) | 
| +        ); | 
| +        if( aNew==0 ){ | 
| +          rc = SQLITE_NOMEM; | 
| +        }else{ | 
| +          pCur->nOvflAlloc = nOvfl*2; | 
| +          pCur->aOverflow = aNew; | 
| +        } | 
| +      } | 
| +      if( rc==SQLITE_OK ){ | 
| +        memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); | 
| +        pCur->curFlags |= BTCF_ValidOvfl; | 
| } | 
| } | 
|  | 
| @@ -3890,22 +4134,21 @@ static int accessPayload( | 
| ** entry for the first required overflow page is valid, skip | 
| ** directly to it. | 
| */ | 
| -    if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){ | 
| +    if( (pCur->curFlags & BTCF_ValidOvfl)!=0 | 
| +     && pCur->aOverflow[offset/ovflSize] | 
| +    ){ | 
| iIdx = (offset/ovflSize); | 
| nextPage = pCur->aOverflow[iIdx]; | 
| offset = (offset%ovflSize); | 
| } | 
| -#endif | 
|  | 
| for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ | 
|  | 
| -#ifndef SQLITE_OMIT_INCRBLOB | 
| /* If required, populate the overflow page-list cache. */ | 
| -      if( pCur->aOverflow ){ | 
| +      if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){ | 
| assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); | 
| pCur->aOverflow[iIdx] = nextPage; | 
| } | 
| -#endif | 
|  | 
| if( offset>=ovflSize ){ | 
| /* The only reason to read this page is to obtain the page | 
| @@ -3913,33 +4156,78 @@ static int accessPayload( | 
| ** data is not required. So first try to lookup the overflow | 
| ** page-list cache, if any, then fall back to the getOverflowPage() | 
| ** function. | 
| +        ** | 
| +        ** Note that the aOverflow[] array must be allocated because eOp!=2 | 
| +        ** here.  If eOp==2, then offset==0 and this branch is never taken. | 
| */ | 
| -#ifndef SQLITE_OMIT_INCRBLOB | 
| -        if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){ | 
| +        assert( eOp!=2 ); | 
| +        assert( pCur->curFlags & BTCF_ValidOvfl ); | 
| +        if( pCur->aOverflow[iIdx+1] ){ | 
| nextPage = pCur->aOverflow[iIdx+1]; | 
| -        } else | 
| -#endif | 
| +        }else{ | 
| rc = getOverflowPage(pBt, nextPage, 0, &nextPage); | 
| +        } | 
| offset -= ovflSize; | 
| }else{ | 
| /* Need to read this page properly. It contains some of the | 
| ** range of data that is being read (eOp==0) or written (eOp!=0). | 
| */ | 
| -        DbPage *pDbPage; | 
| +#ifdef SQLITE_DIRECT_OVERFLOW_READ | 
| +        sqlite3_file *fd; | 
| +#endif | 
| int a = amt; | 
| -        rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage); | 
| -        if( rc==SQLITE_OK ){ | 
| -          aPayload = sqlite3PagerGetData(pDbPage); | 
| -          nextPage = get4byte(aPayload); | 
| -          if( a + offset > ovflSize ){ | 
| -            a = ovflSize - offset; | 
| +        if( a + offset > ovflSize ){ | 
| +          a = ovflSize - offset; | 
| +        } | 
| + | 
| +#ifdef SQLITE_DIRECT_OVERFLOW_READ | 
| +        /* If all the following are true: | 
| +        ** | 
| +        **   1) this is a read operation, and | 
| +        **   2) data is required from the start of this overflow page, and | 
| +        **   3) the database is file-backed, and | 
| +        **   4) there is no open write-transaction, and | 
| +        **   5) the database is not a WAL database, | 
| +        **   6) all data from the page is being read. | 
| +        **   7) at least 4 bytes have already been read into the output buffer | 
| +        ** | 
| +        ** then data can be read directly from the database file into the | 
| +        ** output buffer, bypassing the page-cache altogether. This speeds | 
| +        ** up loading large records that span many overflow pages. | 
| +        */ | 
| +        if( (eOp&0x01)==0                                      /* (1) */ | 
| +         && offset==0                                          /* (2) */ | 
| +         && (bEnd || a==ovflSize)                              /* (6) */ | 
| +         && pBt->inTransaction==TRANS_READ                     /* (4) */ | 
| +         && (fd = sqlite3PagerFile(pBt->pPager))->pMethods     /* (3) */ | 
| +         && pBt->pPage1->aData[19]==0x01                       /* (5) */ | 
| +         && &pBuf[-4]>=pBufStart                               /* (7) */ | 
| +        ){ | 
| +          u8 aSave[4]; | 
| +          u8 *aWrite = &pBuf[-4]; | 
| +          assert( aWrite>=pBufStart );                         /* hence (7) */ | 
| +          memcpy(aSave, aWrite, 4); | 
| +          rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); | 
| +          nextPage = get4byte(aWrite); | 
| +          memcpy(aWrite, aSave, 4); | 
| +        }else | 
| +#endif | 
| + | 
| +        { | 
| +          DbPage *pDbPage; | 
| +          rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, | 
| +              ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) | 
| +          ); | 
| +          if( rc==SQLITE_OK ){ | 
| +            aPayload = sqlite3PagerGetData(pDbPage); | 
| +            nextPage = get4byte(aPayload); | 
| +            rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); | 
| +            sqlite3PagerUnref(pDbPage); | 
| +            offset = 0; | 
| } | 
| -          rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); | 
| -          sqlite3PagerUnref(pDbPage); | 
| -          offset = 0; | 
| -          amt -= a; | 
| -          pBuf += a; | 
| } | 
| +        amt -= a; | 
| +        pBuf += a; | 
| } | 
| } | 
| } | 
| @@ -3952,7 +4240,7 @@ static int accessPayload( | 
|  | 
| /* | 
| ** Read part of the key associated with cursor pCur.  Exactly | 
| -** "amt" bytes will be transfered into pBuf[].  The transfer | 
| +** "amt" bytes will be transferred into pBuf[].  The transfer | 
| ** begins at "offset". | 
| ** | 
| ** The caller must ensure that pCur is pointing to a valid row | 
| @@ -4002,10 +4290,10 @@ int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ | 
| /* | 
| ** Return a pointer to payload information from the entry that the | 
| ** pCur cursor is pointing to.  The pointer is to the beginning of | 
| -** the key if skipKey==0 and it points to the beginning of data if | 
| -** skipKey==1.  The number of bytes of available key/data is written | 
| -** into *pAmt.  If *pAmt==0, then the value returned will not be | 
| -** a valid pointer. | 
| +** the key if index btrees (pPage->intKey==0) and is the data for | 
| +** table btrees (pPage->intKey==1). The number of bytes of available | 
| +** key/data is written into *pAmt.  If *pAmt==0, then the value | 
| +** returned will not be a valid pointer. | 
| ** | 
| ** This routine is an optimization.  It is common for the entire key | 
| ** and data to fit on the local page and for there to be no overflow | 
| @@ -4018,41 +4306,18 @@ int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ | 
| ** page of the database.  The data might change or move the next time | 
| ** any btree routine is called. | 
| */ | 
| -static const unsigned char *fetchPayload( | 
| +static const void *fetchPayload( | 
| BtCursor *pCur,      /* Cursor pointing to entry to read from */ | 
| -  int *pAmt,           /* Write the number of available bytes here */ | 
| -  int skipKey          /* read beginning at data if this is true */ | 
| +  u32 *pAmt            /* Write the number of available bytes here */ | 
| ){ | 
| -  unsigned char *aPayload; | 
| -  MemPage *pPage; | 
| -  u32 nKey; | 
| -  u32 nLocal; | 
| - | 
| assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); | 
| assert( pCur->eState==CURSOR_VALID ); | 
| +  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); | 
| assert( cursorHoldsMutex(pCur) ); | 
| -  pPage = pCur->apPage[pCur->iPage]; | 
| -  assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); | 
| -  if( NEVER(pCur->info.nSize==0) ){ | 
| -    btreeParseCell(pCur->apPage[pCur->iPage], pCur->aiIdx[pCur->iPage], | 
| -                   &pCur->info); | 
| -  } | 
| -  aPayload = pCur->info.pCell; | 
| -  aPayload += pCur->info.nHeader; | 
| -  if( pPage->intKey ){ | 
| -    nKey = 0; | 
| -  }else{ | 
| -    nKey = (int)pCur->info.nKey; | 
| -  } | 
| -  if( skipKey ){ | 
| -    aPayload += nKey; | 
| -    nLocal = pCur->info.nLocal - nKey; | 
| -  }else{ | 
| -    nLocal = pCur->info.nLocal; | 
| -    assert( nLocal<=nKey ); | 
| -  } | 
| -  *pAmt = nLocal; | 
| -  return aPayload; | 
| +  assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); | 
| +  assert( pCur->info.nSize>0 ); | 
| +  *pAmt = pCur->info.nLocal; | 
| +  return (void*)pCur->info.pPayload; | 
| } | 
|  | 
|  | 
| @@ -4070,23 +4335,11 @@ static const unsigned char *fetchPayload( | 
| ** These routines is used to get quick access to key and data | 
| ** in the common case where no overflow pages are used. | 
| */ | 
| -const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){ | 
| -  const void *p = 0; | 
| -  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); | 
| -  assert( cursorHoldsMutex(pCur) ); | 
| -  if( ALWAYS(pCur->eState==CURSOR_VALID) ){ | 
| -    p = (const void*)fetchPayload(pCur, pAmt, 0); | 
| -  } | 
| -  return p; | 
| +const void *sqlite3BtreeKeyFetch(BtCursor *pCur, u32 *pAmt){ | 
| +  return fetchPayload(pCur, pAmt); | 
| } | 
| -const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){ | 
| -  const void *p = 0; | 
| -  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); | 
| -  assert( cursorHoldsMutex(pCur) ); | 
| -  if( ALWAYS(pCur->eState==CURSOR_VALID) ){ | 
| -    p = (const void*)fetchPayload(pCur, pAmt, 1); | 
| -  } | 
| -  return p; | 
| +const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ | 
| +  return fetchPayload(pCur, pAmt); | 
| } | 
|  | 
|  | 
| @@ -4108,24 +4361,26 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ | 
| assert( cursorHoldsMutex(pCur) ); | 
| assert( pCur->eState==CURSOR_VALID ); | 
| assert( pCur->iPage<BTCURSOR_MAX_DEPTH ); | 
| +  assert( pCur->iPage>=0 ); | 
| if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| -  rc = getAndInitPage(pBt, newPgno, &pNewPage); | 
| +  rc = getAndInitPage(pBt, newPgno, &pNewPage, | 
| +               (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); | 
| if( rc ) return rc; | 
| pCur->apPage[i+1] = pNewPage; | 
| pCur->aiIdx[i+1] = 0; | 
| pCur->iPage++; | 
|  | 
| pCur->info.nSize = 0; | 
| -  pCur->validNKey = 0; | 
| +  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); | 
| if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| return SQLITE_OK; | 
| } | 
|  | 
| -#ifndef NDEBUG | 
| +#if 0 | 
| /* | 
| ** Page pParent is an internal (non-leaf) tree page. This function | 
| ** asserts that page number iChild is the left-child if the iIdx'th | 
| @@ -4158,15 +4413,25 @@ static void moveToParent(BtCursor *pCur){ | 
| assert( pCur->eState==CURSOR_VALID ); | 
| assert( pCur->iPage>0 ); | 
| assert( pCur->apPage[pCur->iPage] ); | 
| + | 
| +  /* UPDATE: It is actually possible for the condition tested by the assert | 
| +  ** below to be untrue if the database file is corrupt. This can occur if | 
| +  ** one cursor has modified page pParent while a reference to it is held | 
| +  ** by a second cursor. Which can only happen if a single page is linked | 
| +  ** into more than one b-tree structure in a corrupt database.  */ | 
| +#if 0 | 
| assertParentIndex( | 
| pCur->apPage[pCur->iPage-1], | 
| pCur->aiIdx[pCur->iPage-1], | 
| pCur->apPage[pCur->iPage]->pgno | 
| ); | 
| +#endif | 
| +  testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); | 
| + | 
| releasePage(pCur->apPage[pCur->iPage]); | 
| pCur->iPage--; | 
| pCur->info.nSize = 0; | 
| -  pCur->validNKey = 0; | 
| +  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); | 
| } | 
|  | 
| /* | 
| @@ -4193,8 +4458,6 @@ static void moveToParent(BtCursor *pCur){ | 
| static int moveToRoot(BtCursor *pCur){ | 
| MemPage *pRoot; | 
| int rc = SQLITE_OK; | 
| -  Btree *p = pCur->pBtree; | 
| -  BtShared *pBt = p->pBt; | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); | 
| @@ -4209,52 +4472,51 @@ static int moveToRoot(BtCursor *pCur){ | 
| } | 
|  | 
| if( pCur->iPage>=0 ){ | 
| -    int i; | 
| -    for(i=1; i<=pCur->iPage; i++){ | 
| -      releasePage(pCur->apPage[i]); | 
| -    } | 
| -    pCur->iPage = 0; | 
| +    while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]); | 
| +  }else if( pCur->pgnoRoot==0 ){ | 
| +    pCur->eState = CURSOR_INVALID; | 
| +    return SQLITE_OK; | 
| }else{ | 
| -    rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]); | 
| +    rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], | 
| +                 (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); | 
| if( rc!=SQLITE_OK ){ | 
| pCur->eState = CURSOR_INVALID; | 
| -      return rc; | 
| -    } | 
| -    pCur->iPage = 0; | 
| - | 
| -    /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor | 
| -    ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is | 
| -    ** NULL, the caller expects a table b-tree. If this is not the case, | 
| -    ** return an SQLITE_CORRUPT error.  */ | 
| -    assert( pCur->apPage[0]->intKey==1 || pCur->apPage[0]->intKey==0 ); | 
| -    if( (pCur->pKeyInfo==0)!=pCur->apPage[0]->intKey ){ | 
| -      return SQLITE_CORRUPT_BKPT; | 
| +      return rc; | 
| } | 
| +    pCur->iPage = 0; | 
| } | 
| - | 
| -  /* Assert that the root page is of the correct type. This must be the | 
| -  ** case as the call to this function that loaded the root-page (either | 
| -  ** this call or a previous invocation) would have detected corruption | 
| -  ** if the assumption were not true, and it is not possible for the flags | 
| -  ** byte to have been modified while this cursor is holding a reference | 
| -  ** to the page.  */ | 
| pRoot = pCur->apPage[0]; | 
| assert( pRoot->pgno==pCur->pgnoRoot ); | 
| -  assert( pRoot->isInit && (pCur->pKeyInfo==0)==pRoot->intKey ); | 
| + | 
| +  /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor | 
| +  ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is | 
| +  ** NULL, the caller expects a table b-tree. If this is not the case, | 
| +  ** return an SQLITE_CORRUPT error. | 
| +  ** | 
| +  ** Earlier versions of SQLite assumed that this test could not fail | 
| +  ** if the root page was already loaded when this function was called (i.e. | 
| +  ** if pCur->iPage>=0). But this is not so if the database is corrupted | 
| +  ** in such a way that page pRoot is linked into a second b-tree table | 
| +  ** (or the freelist).  */ | 
| +  assert( pRoot->intKey==1 || pRoot->intKey==0 ); | 
| +  if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ | 
| +    return SQLITE_CORRUPT_BKPT; | 
| +  } | 
|  | 
| pCur->aiIdx[0] = 0; | 
| pCur->info.nSize = 0; | 
| -  pCur->atLast = 0; | 
| -  pCur->validNKey = 0; | 
| +  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); | 
|  | 
| -  if( pRoot->nCell==0 && !pRoot->leaf ){ | 
| +  if( pRoot->nCell>0 ){ | 
| +    pCur->eState = CURSOR_VALID; | 
| +  }else if( !pRoot->leaf ){ | 
| Pgno subpage; | 
| if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; | 
| subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); | 
| pCur->eState = CURSOR_VALID; | 
| rc = moveToChild(pCur, subpage); | 
| }else{ | 
| -    pCur->eState = ((pRoot->nCell>0)?CURSOR_VALID:CURSOR_INVALID); | 
| +    pCur->eState = CURSOR_INVALID; | 
| } | 
| return rc; | 
| } | 
| @@ -4298,17 +4560,16 @@ static int moveToRightmost(BtCursor *pCur){ | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| assert( pCur->eState==CURSOR_VALID ); | 
| -  while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ | 
| +  while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){ | 
| pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); | 
| pCur->aiIdx[pCur->iPage] = pPage->nCell; | 
| rc = moveToChild(pCur, pgno); | 
| +    if( rc ) return rc; | 
| } | 
| -  if( rc==SQLITE_OK ){ | 
| -    pCur->aiIdx[pCur->iPage] = pPage->nCell-1; | 
| -    pCur->info.nSize = 0; | 
| -    pCur->validNKey = 0; | 
| -  } | 
| -  return rc; | 
| +  pCur->aiIdx[pCur->iPage] = pPage->nCell-1; | 
| +  assert( pCur->info.nSize==0 ); | 
| +  assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); | 
| +  return SQLITE_OK; | 
| } | 
|  | 
| /* Move the cursor to the first entry in the table.  Return SQLITE_OK | 
| @@ -4323,7 +4584,7 @@ int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ | 
| rc = moveToRoot(pCur); | 
| if( rc==SQLITE_OK ){ | 
| if( pCur->eState==CURSOR_INVALID ){ | 
| -      assert( pCur->apPage[pCur->iPage]->nCell==0 ); | 
| +      assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); | 
| *pRes = 1; | 
| }else{ | 
| assert( pCur->apPage[pCur->iPage]->nCell>0 ); | 
| @@ -4345,7 +4606,7 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ | 
| assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); | 
|  | 
| /* If the cursor already points to the last entry, this is a no-op. */ | 
| -  if( CURSOR_VALID==pCur->eState && pCur->atLast ){ | 
| +  if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ | 
| #ifdef SQLITE_DEBUG | 
| /* This block serves to assert() that the cursor really does point | 
| ** to the last entry in the b-tree. */ | 
| @@ -4362,13 +4623,18 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ | 
| rc = moveToRoot(pCur); | 
| if( rc==SQLITE_OK ){ | 
| if( CURSOR_INVALID==pCur->eState ){ | 
| -      assert( pCur->apPage[pCur->iPage]->nCell==0 ); | 
| +      assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); | 
| *pRes = 1; | 
| }else{ | 
| assert( pCur->eState==CURSOR_VALID ); | 
| *pRes = 0; | 
| rc = moveToRightmost(pCur); | 
| -      pCur->atLast = rc==SQLITE_OK ?1:0; | 
| +      if( rc==SQLITE_OK ){ | 
| +        pCur->curFlags |= BTCF_AtLast; | 
| +      }else{ | 
| +        pCur->curFlags &= ~BTCF_AtLast; | 
| +      } | 
| + | 
| } | 
| } | 
| return rc; | 
| @@ -4410,6 +4676,7 @@ int sqlite3BtreeMovetoUnpacked( | 
| int *pRes                /* Write search results here */ | 
| ){ | 
| int rc; | 
| +  RecordCompare xRecordCompare; | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); | 
| @@ -4418,37 +4685,48 @@ int sqlite3BtreeMovetoUnpacked( | 
|  | 
| /* If the cursor is already positioned at the point we are trying | 
| ** to move to, then just return without doing any work */ | 
| -  if( pCur->eState==CURSOR_VALID && pCur->validNKey | 
| +  if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 | 
| && pCur->apPage[0]->intKey | 
| ){ | 
| if( pCur->info.nKey==intKey ){ | 
| *pRes = 0; | 
| return SQLITE_OK; | 
| } | 
| -    if( pCur->atLast && pCur->info.nKey<intKey ){ | 
| +    if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKey<intKey ){ | 
| *pRes = -1; | 
| return SQLITE_OK; | 
| } | 
| } | 
|  | 
| +  if( pIdxKey ){ | 
| +    xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); | 
| +    pIdxKey->errCode = 0; | 
| +    assert( pIdxKey->default_rc==1 | 
| +         || pIdxKey->default_rc==0 | 
| +         || pIdxKey->default_rc==-1 | 
| +    ); | 
| +  }else{ | 
| +    xRecordCompare = 0; /* All keys are integers */ | 
| +  } | 
| + | 
| rc = moveToRoot(pCur); | 
| if( rc ){ | 
| return rc; | 
| } | 
| -  assert( pCur->apPage[pCur->iPage] ); | 
| -  assert( pCur->apPage[pCur->iPage]->isInit ); | 
| -  assert( pCur->apPage[pCur->iPage]->nCell>0 || pCur->eState==CURSOR_INVALID ); | 
| +  assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] ); | 
| +  assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit ); | 
| +  assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 ); | 
| if( pCur->eState==CURSOR_INVALID ){ | 
| *pRes = -1; | 
| -    assert( pCur->apPage[pCur->iPage]->nCell==0 ); | 
| +    assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); | 
| return SQLITE_OK; | 
| } | 
| assert( pCur->apPage[0]->intKey || pIdxKey ); | 
| for(;;){ | 
| -    int lwr, upr; | 
| +    int lwr, upr, idx, c; | 
| Pgno chldPg; | 
| MemPage *pPage = pCur->apPage[pCur->iPage]; | 
| -    int c; | 
| +    u8 *pCell;                          /* Pointer to current cell in pPage */ | 
|  | 
| /* pPage->nCell must be greater than zero. If this is the root-page | 
| ** the cursor would have been INVALID above and this for(;;) loop | 
| @@ -4460,35 +4738,47 @@ int sqlite3BtreeMovetoUnpacked( | 
| assert( pPage->intKey==(pIdxKey==0) ); | 
| lwr = 0; | 
| upr = pPage->nCell-1; | 
| -    if( biasRight ){ | 
| -      pCur->aiIdx[pCur->iPage] = (u16)upr; | 
| -    }else{ | 
| -      pCur->aiIdx[pCur->iPage] = (u16)((upr+lwr)/2); | 
| -    } | 
| -    for(;;){ | 
| -      int idx = pCur->aiIdx[pCur->iPage]; /* Index of current cell in pPage */ | 
| -      u8 *pCell;                          /* Pointer to current cell in pPage */ | 
| - | 
| -      pCur->info.nSize = 0; | 
| -      pCell = findCell(pPage, idx) + pPage->childPtrSize; | 
| -      if( pPage->intKey ){ | 
| +    assert( biasRight==0 || biasRight==1 ); | 
| +    idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ | 
| +    pCur->aiIdx[pCur->iPage] = (u16)idx; | 
| +    if( xRecordCompare==0 ){ | 
| +      for(;;){ | 
| i64 nCellKey; | 
| -        if( pPage->hasData ){ | 
| -          u32 dummy; | 
| -          pCell += getVarint32(pCell, dummy); | 
| +        pCell = findCell(pPage, idx) + pPage->childPtrSize; | 
| +        if( pPage->intKeyLeaf ){ | 
| +          while( 0x80 <= *(pCell++) ){ | 
| +            if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; | 
| +          } | 
| } | 
| getVarint(pCell, (u64*)&nCellKey); | 
| -        if( nCellKey==intKey ){ | 
| -          c = 0; | 
| -        }else if( nCellKey<intKey ){ | 
| -          c = -1; | 
| +        if( nCellKey<intKey ){ | 
| +          lwr = idx+1; | 
| +          if( lwr>upr ){ c = -1; break; } | 
| +        }else if( nCellKey>intKey ){ | 
| +          upr = idx-1; | 
| +          if( lwr>upr ){ c = +1; break; } | 
| }else{ | 
| -          assert( nCellKey>intKey ); | 
| -          c = +1; | 
| +          assert( nCellKey==intKey ); | 
| +          pCur->curFlags |= BTCF_ValidNKey; | 
| +          pCur->info.nKey = nCellKey; | 
| +          pCur->aiIdx[pCur->iPage] = (u16)idx; | 
| +          if( !pPage->leaf ){ | 
| +            lwr = idx; | 
| +            goto moveto_next_layer; | 
| +          }else{ | 
| +            *pRes = 0; | 
| +            rc = SQLITE_OK; | 
| +            goto moveto_finish; | 
| +          } | 
| } | 
| -        pCur->validNKey = 1; | 
| -        pCur->info.nKey = nCellKey; | 
| -      }else{ | 
| +        assert( lwr+upr>=0 ); | 
| +        idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2; */ | 
| +      } | 
| +    }else{ | 
| +      for(;;){ | 
| +        int nCell; | 
| +        pCell = findCell(pPage, idx) + pPage->childPtrSize; | 
| + | 
| /* The maximum supported page-size is 65536 bytes. This means that | 
| ** the maximum number of record bytes stored on an index B-Tree | 
| ** page is less than 16384 bytes and may be stored as a 2-byte | 
| @@ -4497,18 +4787,20 @@ int sqlite3BtreeMovetoUnpacked( | 
| ** stored entirely within the b-tree page by inspecting the first | 
| ** 2 bytes of the cell. | 
| */ | 
| -        int nCell = pCell[0]; | 
| -        if( !(nCell & 0x80) && nCell<=pPage->maxLocal ){ | 
| +        nCell = pCell[0]; | 
| +        if( nCell<=pPage->max1bytePayload ){ | 
| /* This branch runs if the record-size field of the cell is a | 
| ** single byte varint and the record fits entirely on the main | 
| ** b-tree page.  */ | 
| -          c = sqlite3VdbeRecordCompare(nCell, (void*)&pCell[1], pIdxKey); | 
| +          testcase( pCell+nCell+1==pPage->aDataEnd ); | 
| +          c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); | 
| }else if( !(pCell[1] & 0x80) | 
| && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal | 
| ){ | 
| /* The record-size field is a 2 byte varint and the record | 
| ** fits entirely on the main b-tree page.  */ | 
| -          c = sqlite3VdbeRecordCompare(nCell, (void*)&pCell[2], pIdxKey); | 
| +          testcase( pCell+nCell+2==pPage->aDataEnd ); | 
| +          c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); | 
| }else{ | 
| /* The record flows over onto one or more overflow pages. In | 
| ** this case the whole cell needs to be parsed, a buffer allocated | 
| @@ -4523,58 +4815,58 @@ int sqlite3BtreeMovetoUnpacked( | 
| rc = SQLITE_NOMEM; | 
| goto moveto_finish; | 
| } | 
| -          rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0); | 
| +          pCur->aiIdx[pCur->iPage] = (u16)idx; | 
| +          rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); | 
| if( rc ){ | 
| sqlite3_free(pCellKey); | 
| goto moveto_finish; | 
| } | 
| -          c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey); | 
| +          c = xRecordCompare(nCell, pCellKey, pIdxKey); | 
| sqlite3_free(pCellKey); | 
| } | 
| -      } | 
| -      if( c==0 ){ | 
| -        if( pPage->intKey && !pPage->leaf ){ | 
| -          lwr = idx; | 
| -          upr = lwr - 1; | 
| -          break; | 
| +        assert( | 
| +            (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) | 
| +         && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed) | 
| +        ); | 
| +        if( c<0 ){ | 
| +          lwr = idx+1; | 
| +        }else if( c>0 ){ | 
| +          upr = idx-1; | 
| }else{ | 
| +          assert( c==0 ); | 
| *pRes = 0; | 
| rc = SQLITE_OK; | 
| +          pCur->aiIdx[pCur->iPage] = (u16)idx; | 
| +          if( pIdxKey->errCode ) rc = SQLITE_CORRUPT; | 
| goto moveto_finish; | 
| } | 
| +        if( lwr>upr ) break; | 
| +        assert( lwr+upr>=0 ); | 
| +        idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2 */ | 
| } | 
| -      if( c<0 ){ | 
| -        lwr = idx+1; | 
| -      }else{ | 
| -        upr = idx-1; | 
| -      } | 
| -      if( lwr>upr ){ | 
| -        break; | 
| -      } | 
| -      pCur->aiIdx[pCur->iPage] = (u16)((lwr+upr)/2); | 
| } | 
| -    assert( lwr==upr+1 ); | 
| +    assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) ); | 
| assert( pPage->isInit ); | 
| if( pPage->leaf ){ | 
| -      chldPg = 0; | 
| -    }else if( lwr>=pPage->nCell ){ | 
| -      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); | 
| -    }else{ | 
| -      chldPg = get4byte(findCell(pPage, lwr)); | 
| -    } | 
| -    if( chldPg==0 ){ | 
| assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); | 
| +      pCur->aiIdx[pCur->iPage] = (u16)idx; | 
| *pRes = c; | 
| rc = SQLITE_OK; | 
| goto moveto_finish; | 
| } | 
| +moveto_next_layer: | 
| +    if( lwr>=pPage->nCell ){ | 
| +      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); | 
| +    }else{ | 
| +      chldPg = get4byte(findCell(pPage, lwr)); | 
| +    } | 
| pCur->aiIdx[pCur->iPage] = (u16)lwr; | 
| -    pCur->info.nSize = 0; | 
| -    pCur->validNKey = 0; | 
| rc = moveToChild(pCur, chldPg); | 
| -    if( rc ) goto moveto_finish; | 
| +    if( rc ) break; | 
| } | 
| moveto_finish: | 
| +  pCur->info.nSize = 0; | 
| +  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); | 
| return rc; | 
| } | 
|  | 
| @@ -4599,43 +4891,67 @@ int sqlite3BtreeEof(BtCursor *pCur){ | 
| ** successful then set *pRes=0.  If the cursor | 
| ** was already pointing to the last entry in the database before | 
| ** this routine was called, then set *pRes=1. | 
| -*/ | 
| -int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ | 
| +** | 
| +** The main entry point is sqlite3BtreeNext().  That routine is optimized | 
| +** for the common case of merely incrementing the cell counter BtCursor.aiIdx | 
| +** to the next cell on the current page.  The (slower) btreeNext() helper | 
| +** routine is called when it is necessary to move to a different page or | 
| +** to restore the cursor. | 
| +** | 
| +** The calling function will set *pRes to 0 or 1.  The initial *pRes value | 
| +** will be 1 if the cursor being stepped corresponds to an SQL index and | 
| +** if this routine could have been skipped if that SQL index had been | 
| +** a unique index.  Otherwise the caller will have set *pRes to zero. | 
| +** Zero is the common case. The btree implementation is free to use the | 
| +** initial *pRes value as a hint to improve performance, but the current | 
| +** SQLite btree implementation does not. (Note that the comdb2 btree | 
| +** implementation does use this hint, however.) | 
| +*/ | 
| +static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){ | 
| int rc; | 
| int idx; | 
| MemPage *pPage; | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| -  rc = restoreCursorPosition(pCur); | 
| -  if( rc!=SQLITE_OK ){ | 
| -    return rc; | 
| -  } | 
| -  assert( pRes!=0 ); | 
| -  if( CURSOR_INVALID==pCur->eState ){ | 
| -    *pRes = 1; | 
| -    return SQLITE_OK; | 
| -  } | 
| -  if( pCur->skipNext>0 ){ | 
| -    pCur->skipNext = 0; | 
| -    *pRes = 0; | 
| -    return SQLITE_OK; | 
| +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); | 
| +  assert( *pRes==0 ); | 
| +  if( pCur->eState!=CURSOR_VALID ){ | 
| +    assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); | 
| +    rc = restoreCursorPosition(pCur); | 
| +    if( rc!=SQLITE_OK ){ | 
| +      return rc; | 
| +    } | 
| +    if( CURSOR_INVALID==pCur->eState ){ | 
| +      *pRes = 1; | 
| +      return SQLITE_OK; | 
| +    } | 
| +    if( pCur->skipNext ){ | 
| +      assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); | 
| +      pCur->eState = CURSOR_VALID; | 
| +      if( pCur->skipNext>0 ){ | 
| +        pCur->skipNext = 0; | 
| +        return SQLITE_OK; | 
| +      } | 
| +      pCur->skipNext = 0; | 
| +    } | 
| } | 
| -  pCur->skipNext = 0; | 
|  | 
| pPage = pCur->apPage[pCur->iPage]; | 
| idx = ++pCur->aiIdx[pCur->iPage]; | 
| assert( pPage->isInit ); | 
| -  assert( idx<=pPage->nCell ); | 
|  | 
| -  pCur->info.nSize = 0; | 
| -  pCur->validNKey = 0; | 
| +  /* If the database file is corrupt, it is possible for the value of idx | 
| +  ** to be invalid here. This can only occur if a second cursor modifies | 
| +  ** the page while cursor pCur is holding a reference to it. Which can | 
| +  ** only happen if the database is corrupt in such a way as to link the | 
| +  ** page into more than one b-tree structure. */ | 
| +  testcase( idx>pPage->nCell ); | 
| + | 
| if( idx>=pPage->nCell ){ | 
| if( !pPage->leaf ){ | 
| rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); | 
| if( rc ) return rc; | 
| -      rc = moveToLeftmost(pCur); | 
| -      *pRes = 0; | 
| -      return rc; | 
| +      return moveToLeftmost(pCur); | 
| } | 
| do{ | 
| if( pCur->iPage==0 ){ | 
| @@ -4646,58 +4962,97 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ | 
| moveToParent(pCur); | 
| pPage = pCur->apPage[pCur->iPage]; | 
| }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell ); | 
| -    *pRes = 0; | 
| if( pPage->intKey ){ | 
| -      rc = sqlite3BtreeNext(pCur, pRes); | 
| +      return sqlite3BtreeNext(pCur, pRes); | 
| }else{ | 
| -      rc = SQLITE_OK; | 
| +      return SQLITE_OK; | 
| } | 
| -    return rc; | 
| } | 
| +  if( pPage->leaf ){ | 
| +    return SQLITE_OK; | 
| +  }else{ | 
| +    return moveToLeftmost(pCur); | 
| +  } | 
| +} | 
| +int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ | 
| +  MemPage *pPage; | 
| +  assert( cursorHoldsMutex(pCur) ); | 
| +  assert( pRes!=0 ); | 
| +  assert( *pRes==0 || *pRes==1 ); | 
| +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); | 
| +  pCur->info.nSize = 0; | 
| +  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); | 
| *pRes = 0; | 
| +  if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes); | 
| +  pPage = pCur->apPage[pCur->iPage]; | 
| +  if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){ | 
| +    pCur->aiIdx[pCur->iPage]--; | 
| +    return btreeNext(pCur, pRes); | 
| +  } | 
| if( pPage->leaf ){ | 
| return SQLITE_OK; | 
| +  }else{ | 
| +    return moveToLeftmost(pCur); | 
| } | 
| -  rc = moveToLeftmost(pCur); | 
| -  return rc; | 
| } | 
|  | 
| - | 
| /* | 
| ** Step the cursor to the back to the previous entry in the database.  If | 
| ** successful then set *pRes=0.  If the cursor | 
| ** was already pointing to the first entry in the database before | 
| ** this routine was called, then set *pRes=1. | 
| -*/ | 
| -int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ | 
| +** | 
| +** The main entry point is sqlite3BtreePrevious().  That routine is optimized | 
| +** for the common case of merely decrementing the cell counter BtCursor.aiIdx | 
| +** to the previous cell on the current page.  The (slower) btreePrevious() | 
| +** helper routine is called when it is necessary to move to a different page | 
| +** or to restore the cursor. | 
| +** | 
| +** The calling function will set *pRes to 0 or 1.  The initial *pRes value | 
| +** will be 1 if the cursor being stepped corresponds to an SQL index and | 
| +** if this routine could have been skipped if that SQL index had been | 
| +** a unique index.  Otherwise the caller will have set *pRes to zero. | 
| +** Zero is the common case. The btree implementation is free to use the | 
| +** initial *pRes value as a hint to improve performance, but the current | 
| +** SQLite btree implementation does not. (Note that the comdb2 btree | 
| +** implementation does use this hint, however.) | 
| +*/ | 
| +static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){ | 
| int rc; | 
| MemPage *pPage; | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| -  rc = restoreCursorPosition(pCur); | 
| -  if( rc!=SQLITE_OK ){ | 
| -    return rc; | 
| -  } | 
| -  pCur->atLast = 0; | 
| -  if( CURSOR_INVALID==pCur->eState ){ | 
| -    *pRes = 1; | 
| -    return SQLITE_OK; | 
| -  } | 
| -  if( pCur->skipNext<0 ){ | 
| -    pCur->skipNext = 0; | 
| -    *pRes = 0; | 
| -    return SQLITE_OK; | 
| +  assert( pRes!=0 ); | 
| +  assert( *pRes==0 ); | 
| +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); | 
| +  assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); | 
| +  assert( pCur->info.nSize==0 ); | 
| +  if( pCur->eState!=CURSOR_VALID ){ | 
| +    rc = restoreCursorPosition(pCur); | 
| +    if( rc!=SQLITE_OK ){ | 
| +      return rc; | 
| +    } | 
| +    if( CURSOR_INVALID==pCur->eState ){ | 
| +      *pRes = 1; | 
| +      return SQLITE_OK; | 
| +    } | 
| +    if( pCur->skipNext ){ | 
| +      assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); | 
| +      pCur->eState = CURSOR_VALID; | 
| +      if( pCur->skipNext<0 ){ | 
| +        pCur->skipNext = 0; | 
| +        return SQLITE_OK; | 
| +      } | 
| +      pCur->skipNext = 0; | 
| +    } | 
| } | 
| -  pCur->skipNext = 0; | 
|  | 
| pPage = pCur->apPage[pCur->iPage]; | 
| assert( pPage->isInit ); | 
| if( !pPage->leaf ){ | 
| int idx = pCur->aiIdx[pCur->iPage]; | 
| rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); | 
| -    if( rc ){ | 
| -      return rc; | 
| -    } | 
| +    if( rc ) return rc; | 
| rc = moveToRightmost(pCur); | 
| }else{ | 
| while( pCur->aiIdx[pCur->iPage]==0 ){ | 
| @@ -4708,8 +5063,8 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ | 
| } | 
| moveToParent(pCur); | 
| } | 
| -    pCur->info.nSize = 0; | 
| -    pCur->validNKey = 0; | 
| +    assert( pCur->info.nSize==0 ); | 
| +    assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 ); | 
|  | 
| pCur->aiIdx[pCur->iPage]--; | 
| pPage = pCur->apPage[pCur->iPage]; | 
| @@ -4719,9 +5074,25 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ | 
| rc = SQLITE_OK; | 
| } | 
| } | 
| -  *pRes = 0; | 
| return rc; | 
| } | 
| +int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ | 
| +  assert( cursorHoldsMutex(pCur) ); | 
| +  assert( pRes!=0 ); | 
| +  assert( *pRes==0 || *pRes==1 ); | 
| +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); | 
| +  *pRes = 0; | 
| +  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); | 
| +  pCur->info.nSize = 0; | 
| +  if( pCur->eState!=CURSOR_VALID | 
| +   || pCur->aiIdx[pCur->iPage]==0 | 
| +   || pCur->apPage[pCur->iPage]->leaf==0 | 
| +  ){ | 
| +    return btreePrevious(pCur, pRes); | 
| +  } | 
| +  pCur->aiIdx[pCur->iPage]--; | 
| +  return SQLITE_OK; | 
| +} | 
|  | 
| /* | 
| ** Allocate a new page from the database file. | 
| @@ -4735,21 +5106,23 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ | 
| ** an error.  *ppPage and *pPgno are undefined in the event of an error. | 
| ** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned. | 
| ** | 
| -** If the "nearby" parameter is not 0, then a (feeble) effort is made to | 
| +** If the "nearby" parameter is not 0, then an effort is made to | 
| ** locate a page close to the page number "nearby".  This can be used in an | 
| ** attempt to keep related pages close to each other in the database file, | 
| ** which in turn can make database access faster. | 
| ** | 
| -** If the "exact" parameter is not 0, and the page-number nearby exists | 
| -** anywhere on the free-list, then it is guarenteed to be returned. This | 
| -** is only used by auto-vacuum databases when allocating a new table. | 
| +** If the eMode parameter is BTALLOC_EXACT and the nearby page exists | 
| +** anywhere on the free-list, then it is guaranteed to be returned.  If | 
| +** eMode is BTALLOC_LT then the page returned will be less than or equal | 
| +** to nearby if any such page exists.  If eMode is BTALLOC_ANY then there | 
| +** are no restrictions on which page is returned. | 
| */ | 
| static int allocateBtreePage( | 
| -  BtShared *pBt, | 
| -  MemPage **ppPage, | 
| -  Pgno *pPgno, | 
| -  Pgno nearby, | 
| -  u8 exact | 
| +  BtShared *pBt,         /* The btree */ | 
| +  MemPage **ppPage,      /* Store pointer to the allocated page here */ | 
| +  Pgno *pPgno,           /* Store the page number here */ | 
| +  Pgno nearby,           /* Search for a page near this one */ | 
| +  u8 eMode               /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ | 
| ){ | 
| MemPage *pPage1; | 
| int rc; | 
| @@ -4760,6 +5133,7 @@ static int allocateBtreePage( | 
| Pgno mxPage;     /* Total size of the database file */ | 
|  | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| +  assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); | 
| pPage1 = pBt->pPage1; | 
| mxPage = btreePagecount(pBt); | 
| n = get4byte(&pPage1->aData[36]); | 
| @@ -4772,21 +5146,24 @@ static int allocateBtreePage( | 
| Pgno iTrunk; | 
| u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ | 
|  | 
| -    /* If the 'exact' parameter was true and a query of the pointer-map | 
| +    /* If eMode==BTALLOC_EXACT and a query of the pointer-map | 
| ** shows that the page 'nearby' is somewhere on the free-list, then | 
| ** the entire-list will be searched for that page. | 
| */ | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| -    if( exact && nearby<=mxPage ){ | 
| -      u8 eType; | 
| -      assert( nearby>0 ); | 
| -      assert( pBt->autoVacuum ); | 
| -      rc = ptrmapGet(pBt, nearby, &eType, 0); | 
| -      if( rc ) return rc; | 
| -      if( eType==PTRMAP_FREEPAGE ){ | 
| -        searchList = 1; | 
| +    if( eMode==BTALLOC_EXACT ){ | 
| +      if( nearby<=mxPage ){ | 
| +        u8 eType; | 
| +        assert( nearby>0 ); | 
| +        assert( pBt->autoVacuum ); | 
| +        rc = ptrmapGet(pBt, nearby, &eType, 0); | 
| +        if( rc ) return rc; | 
| +        if( eType==PTRMAP_FREEPAGE ){ | 
| +          searchList = 1; | 
| +        } | 
| } | 
| -      *pPgno = nearby; | 
| +    }else if( eMode==BTALLOC_LE ){ | 
| +      searchList = 1; | 
| } | 
| #endif | 
|  | 
| @@ -4799,7 +5176,8 @@ static int allocateBtreePage( | 
|  | 
| /* The code within this loop is run only once if the 'searchList' variable | 
| ** is not true. Otherwise, it runs once for each trunk-page on the | 
| -    ** free-list until the page 'nearby' is located. | 
| +    ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) | 
| +    ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT) | 
| */ | 
| do { | 
| pPrevTrunk = pTrunk; | 
| @@ -4818,6 +5196,8 @@ static int allocateBtreePage( | 
| pTrunk = 0; | 
| goto end_allocate_page; | 
| } | 
| +      assert( pTrunk!=0 ); | 
| +      assert( pTrunk->aData!=0 ); | 
|  | 
| k = get4byte(&pTrunk->aData[4]); /* # of leaves on this trunk page */ | 
| if( k==0 && !searchList ){ | 
| @@ -4839,11 +5219,13 @@ static int allocateBtreePage( | 
| rc = SQLITE_CORRUPT_BKPT; | 
| goto end_allocate_page; | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| -      }else if( searchList && nearby==iTrunk ){ | 
| +      }else if( searchList | 
| +            && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE)) | 
| +      ){ | 
| /* The list is being searched and this trunk page is the page | 
| ** to allocate, regardless of whether it has leaves. | 
| */ | 
| -        assert( *pPgno==iTrunk ); | 
| +        *pPgno = iTrunk; | 
| *ppPage = pTrunk; | 
| searchList = 0; | 
| rc = sqlite3PagerWrite(pTrunk->pDbPage); | 
| @@ -4906,14 +5288,24 @@ static int allocateBtreePage( | 
| unsigned char *aData = pTrunk->aData; | 
| if( nearby>0 ){ | 
| u32 i; | 
| -          int dist; | 
| closest = 0; | 
| -          dist = sqlite3AbsInt32(get4byte(&aData[8]) - nearby); | 
| -          for(i=1; i<k; i++){ | 
| -            int d2 = sqlite3AbsInt32(get4byte(&aData[8+i*4]) - nearby); | 
| -            if( d2<dist ){ | 
| -              closest = i; | 
| -              dist = d2; | 
| +          if( eMode==BTALLOC_LE ){ | 
| +            for(i=0; i<k; i++){ | 
| +              iPage = get4byte(&aData[8+i*4]); | 
| +              if( iPage<=nearby ){ | 
| +                closest = i; | 
| +                break; | 
| +              } | 
| +            } | 
| +          }else{ | 
| +            int dist; | 
| +            dist = sqlite3AbsInt32(get4byte(&aData[8]) - nearby); | 
| +            for(i=1; i<k; i++){ | 
| +              int d2 = sqlite3AbsInt32(get4byte(&aData[8+i*4]) - nearby); | 
| +              if( d2<dist ){ | 
| +                closest = i; | 
| +                dist = d2; | 
| +              } | 
| } | 
| } | 
| }else{ | 
| @@ -4927,7 +5319,9 @@ static int allocateBtreePage( | 
| goto end_allocate_page; | 
| } | 
| testcase( iPage==mxPage ); | 
| -        if( !searchList || iPage==nearby ){ | 
| +        if( !searchList | 
| +         || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE)) | 
| +        ){ | 
| int noContent; | 
| *pPgno = iPage; | 
| TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" | 
| @@ -4939,7 +5333,7 @@ static int allocateBtreePage( | 
| memcpy(&aData[8+closest*4], &aData[4+k*4], 4); | 
| } | 
| put4byte(&aData[4], k-1); | 
| -          noContent = !btreeGetHasContent(pBt, *pPgno); | 
| +          noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0; | 
| rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); | 
| if( rc==SQLITE_OK ){ | 
| rc = sqlite3PagerWrite((*ppPage)->pDbPage); | 
| @@ -4954,8 +5348,26 @@ static int allocateBtreePage( | 
| pPrevTrunk = 0; | 
| }while( searchList ); | 
| }else{ | 
| -    /* There are no pages on the freelist, so create a new page at the | 
| -    ** end of the file */ | 
| +    /* There are no pages on the freelist, so append a new page to the | 
| +    ** database image. | 
| +    ** | 
| +    ** Normally, new pages allocated by this block can be requested from the | 
| +    ** pager layer with the 'no-content' flag set. This prevents the pager | 
| +    ** from trying to read the pages content from disk. However, if the | 
| +    ** current transaction has already run one or more incremental-vacuum | 
| +    ** steps, then the page we are about to allocate may contain content | 
| +    ** that is required in the event of a rollback. In this case, do | 
| +    ** not set the no-content flag. This causes the pager to load and journal | 
| +    ** the current page content before overwriting it. | 
| +    ** | 
| +    ** Note that the pager will not actually attempt to load or journal | 
| +    ** content for any page that really does lie past the end of the database | 
| +    ** file on disk. So the effects of disabling the no-content optimization | 
| +    ** here are confined to those pages that lie between the end of the | 
| +    ** database image and the end of the database file. | 
| +    */ | 
| +    int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0; | 
| + | 
| rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); | 
| if( rc ) return rc; | 
| pBt->nPage++; | 
| @@ -4970,7 +5382,7 @@ static int allocateBtreePage( | 
| MemPage *pPg = 0; | 
| TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); | 
| assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); | 
| -      rc = btreeGetPage(pBt, pBt->nPage, &pPg, 1); | 
| +      rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); | 
| if( rc==SQLITE_OK ){ | 
| rc = sqlite3PagerWrite(pPg->pDbPage); | 
| releasePage(pPg); | 
| @@ -4984,7 +5396,7 @@ static int allocateBtreePage( | 
| *pPgno = pBt->nPage; | 
|  | 
| assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); | 
| -    rc = btreeGetPage(pBt, *pPgno, ppPage, 1); | 
| +    rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); | 
| if( rc ) return rc; | 
| rc = sqlite3PagerWrite((*ppPage)->pDbPage); | 
| if( rc!=SQLITE_OK ){ | 
| @@ -5001,6 +5413,7 @@ end_allocate_page: | 
| if( rc==SQLITE_OK ){ | 
| if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ | 
| releasePage(*ppPage); | 
| +      *ppPage = 0; | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| (*ppPage)->isInit = 0; | 
| @@ -5048,7 +5461,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ | 
| nFree = get4byte(&pPage1->aData[36]); | 
| put4byte(&pPage1->aData[36], nFree+1); | 
|  | 
| -  if( pBt->secureDelete ){ | 
| +  if( pBt->btsFlags & BTS_SECURE_DELETE ){ | 
| /* If the secure_delete option is enabled, then | 
| ** always fully overwrite deleted information with zeros. | 
| */ | 
| @@ -5109,7 +5522,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ | 
| if( rc==SQLITE_OK ){ | 
| put4byte(&pTrunk->aData[4], nLeaf+1); | 
| put4byte(&pTrunk->aData[8+nLeaf*4], iPage); | 
| -        if( pPage && !pBt->secureDelete ){ | 
| +        if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){ | 
| sqlite3PagerDontWrite(pPage->pDbPage); | 
| } | 
| rc = btreeSetHasContent(pBt, iPage); | 
| @@ -5152,9 +5565,15 @@ static void freePage(MemPage *pPage, int *pRC){ | 
| } | 
|  | 
| /* | 
| -** Free any overflow pages associated with the given Cell. | 
| +** Free any overflow pages associated with the given Cell.  Write the | 
| +** local Cell size (the number of bytes on the original page, omitting | 
| +** overflow) into *pnSize. | 
| */ | 
| -static int clearCell(MemPage *pPage, unsigned char *pCell){ | 
| +static int clearCell( | 
| +  MemPage *pPage,          /* The page that contains the Cell */ | 
| +  unsigned char *pCell,    /* First byte of the Cell */ | 
| +  u16 *pnSize              /* Write the size of the Cell here */ | 
| +){ | 
| BtShared *pBt = pPage->pBt; | 
| CellInfo info; | 
| Pgno ovflPgno; | 
| @@ -5164,9 +5583,13 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){ | 
|  | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| btreeParseCellPtr(pPage, pCell, &info); | 
| +  *pnSize = info.nSize; | 
| if( info.iOverflow==0 ){ | 
| return SQLITE_OK;  /* No overflow pages. Return without doing anything */ | 
| } | 
| +  if( pCell+info.iOverflow+3 > pPage->aData+pPage->maskPage ){ | 
| +    return SQLITE_CORRUPT_BKPT;  /* Cell extends past end of page */ | 
| +  } | 
| ovflPgno = get4byte(&pCell[info.iOverflow]); | 
| assert( pBt->usableSize > 4 ); | 
| ovflPageSize = pBt->usableSize - 4; | 
| @@ -5244,7 +5667,6 @@ static int fillInCell( | 
| BtShared *pBt = pPage->pBt; | 
| Pgno pgnoOvfl = 0; | 
| int nHeader; | 
| -  CellInfo info; | 
|  | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
|  | 
| @@ -5254,23 +5676,17 @@ static int fillInCell( | 
| || sqlite3PagerIswriteable(pPage->pDbPage) ); | 
|  | 
| /* Fill in the header. */ | 
| -  nHeader = 0; | 
| -  if( !pPage->leaf ){ | 
| -    nHeader += 4; | 
| -  } | 
| -  if( pPage->hasData ){ | 
| -    nHeader += putVarint(&pCell[nHeader], nData+nZero); | 
| +  nHeader = pPage->childPtrSize; | 
| +  nPayload = nData + nZero; | 
| +  if( pPage->intKeyLeaf ){ | 
| +    nHeader += putVarint32(&pCell[nHeader], nPayload); | 
| }else{ | 
| -    nData = nZero = 0; | 
| +    assert( nData==0 ); | 
| +    assert( nZero==0 ); | 
| } | 
| nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); | 
| -  btreeParseCellPtr(pPage, pCell, &info); | 
| -  assert( info.nHeader==nHeader ); | 
| -  assert( info.nKey==nKey ); | 
| -  assert( info.nData==(u32)(nData+nZero) ); | 
|  | 
| -  /* Fill in the payload */ | 
| -  nPayload = nData + nZero; | 
| +  /* Fill in the payload size */ | 
| if( pPage->intKey ){ | 
| pSrc = pData; | 
| nSrc = nData; | 
| @@ -5279,15 +5695,55 @@ static int fillInCell( | 
| if( NEVER(nKey>0x7fffffff || pKey==0) ){ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
| -    nPayload += (int)nKey; | 
| +    nPayload = (int)nKey; | 
| pSrc = pKey; | 
| nSrc = (int)nKey; | 
| } | 
| -  *pnSize = info.nSize; | 
| -  spaceLeft = info.nLocal; | 
| +  if( nPayload<=pPage->maxLocal ){ | 
| +    n = nHeader + nPayload; | 
| +    testcase( n==3 ); | 
| +    testcase( n==4 ); | 
| +    if( n<4 ) n = 4; | 
| +    *pnSize = n; | 
| +    spaceLeft = nPayload; | 
| +    pPrior = pCell; | 
| +  }else{ | 
| +    int mn = pPage->minLocal; | 
| +    n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4); | 
| +    testcase( n==pPage->maxLocal ); | 
| +    testcase( n==pPage->maxLocal+1 ); | 
| +    if( n > pPage->maxLocal ) n = mn; | 
| +    spaceLeft = n; | 
| +    *pnSize = n + nHeader + 4; | 
| +    pPrior = &pCell[nHeader+n]; | 
| +  } | 
| pPayload = &pCell[nHeader]; | 
| -  pPrior = &pCell[info.iOverflow]; | 
|  | 
| +  /* At this point variables should be set as follows: | 
| +  ** | 
| +  **   nPayload           Total payload size in bytes | 
| +  **   pPayload           Begin writing payload here | 
| +  **   spaceLeft          Space available at pPayload.  If nPayload>spaceLeft, | 
| +  **                      that means content must spill into overflow pages. | 
| +  **   *pnSize            Size of the local cell (not counting overflow pages) | 
| +  **   pPrior             Where to write the pgno of the first overflow page | 
| +  ** | 
| +  ** Use a call to btreeParseCellPtr() to verify that the values above | 
| +  ** were computed correctly. | 
| +  */ | 
| +#if SQLITE_DEBUG | 
| +  { | 
| +    CellInfo info; | 
| +    btreeParseCellPtr(pPage, pCell, &info); | 
| +    assert( nHeader=(int)(info.pPayload - pCell) ); | 
| +    assert( info.nKey==nKey ); | 
| +    assert( *pnSize == info.nSize ); | 
| +    assert( spaceLeft == info.nLocal ); | 
| +    assert( pPrior == &pCell[info.iOverflow] ); | 
| +  } | 
| +#endif | 
| + | 
| +  /* Write the payload into the local Cell and any extra into overflow pages */ | 
| while( nPayload>0 ){ | 
| if( spaceLeft==0 ){ | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| @@ -5309,7 +5765,7 @@ static int fillInCell( | 
| ** If this is the first overflow page, then write a partial entry | 
| ** to the pointer-map. If we write nothing to this pointer-map slot, | 
| ** then the optimistic overflow chain processing in clearCell() | 
| -      ** may misinterpret the uninitialised values and delete the | 
| +      ** may misinterpret the uninitialized values and delete the | 
| ** wrong pages from the database. | 
| */ | 
| if( pBt->autoVacuum && rc==SQLITE_OK ){ | 
| @@ -5384,7 +5840,6 @@ static int fillInCell( | 
| ** "sz" must be the number of bytes in the cell. | 
| */ | 
| static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ | 
| -  int i;          /* Loop counter */ | 
| u32 pc;         /* Offset to cell content of cell being deleted */ | 
| u8 *data;       /* pPage->aData */ | 
| u8 *ptr;        /* Used to move bytes around within data[] */ | 
| @@ -5398,7 +5853,7 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ | 
| assert( sqlite3PagerIswriteable(pPage->pDbPage) ); | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| data = pPage->aData; | 
| -  ptr = &data[pPage->cellOffset + 2*idx]; | 
| +  ptr = &pPage->aCellIdx[2*idx]; | 
| pc = get2byte(ptr); | 
| hdr = pPage->hdrOffset; | 
| testcase( pc==get2byte(&data[hdr+5]) ); | 
| @@ -5412,11 +5867,8 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ | 
| *pRC = rc; | 
| return; | 
| } | 
| -  for(i=idx+1; i<pPage->nCell; i++, ptr+=2){ | 
| -    ptr[0] = ptr[2]; | 
| -    ptr[1] = ptr[3]; | 
| -  } | 
| pPage->nCell--; | 
| +  memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); | 
| put2byte(&data[hdr+3], pPage->nCell); | 
| pPage->nFree += 2; | 
| } | 
| @@ -5428,15 +5880,10 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ | 
| ** If the cell content will fit on the page, then put it there.  If it | 
| ** will not fit, then make a copy of the cell content into pTemp if | 
| ** pTemp is not null.  Regardless of pTemp, allocate a new entry | 
| -** in pPage->aOvfl[] and make it point to the cell content (either | 
| +** in pPage->apOvfl[] and make it point to the cell content (either | 
| ** in pTemp or the original pCell) and also record its index. | 
| ** Allocating a new entry in pPage->aCell[] implies that | 
| ** pPage->nOverflow is incremented. | 
| -** | 
| -** If nSkip is non-zero, then do not copy the first nSkip bytes of the | 
| -** cell. The caller will overwrite them after this function returns. If | 
| -** nSkip is non-zero, then pCell may not point to an invalid memory location | 
| -** (but pCell+nSkip is always valid). | 
| */ | 
| static void insertCell( | 
| MemPage *pPage,   /* Page into which we are copying */ | 
| @@ -5453,15 +5900,14 @@ static void insertCell( | 
| int ins;          /* Index in data[] where new cell pointer is inserted */ | 
| int cellOffset;   /* Address of first cell pointer in data[] */ | 
| u8 *data;         /* The content of the whole page */ | 
| -  u8 *ptr;          /* Used for moving information around in data[] */ | 
| - | 
| -  int nSkip = (iChild ? 4 : 0); | 
|  | 
| if( *pRC ) return; | 
|  | 
| assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); | 
| -  assert( pPage->nCell<=MX_CELL(pPage->pBt) && MX_CELL(pPage->pBt)<=10921 ); | 
| -  assert( pPage->nOverflow<=ArraySize(pPage->aOvfl) ); | 
| +  assert( MX_CELL(pPage->pBt)<=10921 ); | 
| +  assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); | 
| +  assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); | 
| +  assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); | 
| assert( sqlite3_mutex_held(pPage->pBt->mutex) ); | 
| /* The cell should normally be sized correctly.  However, when moving a | 
| ** malformed cell from a leaf page to an interior page, if the cell size | 
| @@ -5471,16 +5917,16 @@ static void insertCell( | 
| assert( sz==cellSizePtr(pPage, pCell) || (sz==8 && iChild>0) ); | 
| if( pPage->nOverflow || sz+2>pPage->nFree ){ | 
| if( pTemp ){ | 
| -      memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip); | 
| +      memcpy(pTemp, pCell, sz); | 
| pCell = pTemp; | 
| } | 
| if( iChild ){ | 
| put4byte(pCell, iChild); | 
| } | 
| j = pPage->nOverflow++; | 
| -    assert( j<(int)(sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0])) ); | 
| -    pPage->aOvfl[j].pCell = pCell; | 
| -    pPage->aOvfl[j].idx = (u16)i; | 
| +    assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); | 
| +    pPage->apOvfl[j] = pCell; | 
| +    pPage->aiOvfl[j] = (u16)i; | 
| }else{ | 
| int rc = sqlite3PagerWrite(pPage->pDbPage); | 
| if( rc!=SQLITE_OK ){ | 
| @@ -5500,14 +5946,11 @@ static void insertCell( | 
| assert( idx+sz <= (int)pPage->pBt->usableSize ); | 
| pPage->nCell++; | 
| pPage->nFree -= (u16)(2 + sz); | 
| -    memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip); | 
| +    memcpy(&data[idx], pCell, sz); | 
| if( iChild ){ | 
| put4byte(&data[idx], iChild); | 
| } | 
| -    for(j=end, ptr=&data[j]; j>ins; j-=2, ptr-=2){ | 
| -      ptr[0] = ptr[-2]; | 
| -      ptr[1] = ptr[-1]; | 
| -    } | 
| +    memmove(&data[ins+2], &data[ins], end-ins); | 
| put2byte(&data[ins], idx); | 
| put2byte(&data[pPage->hdrOffset+3], pPage->nCell); | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| @@ -5526,7 +5969,7 @@ static void insertCell( | 
| ** The cells are guaranteed to fit on the page. | 
| */ | 
| static void assemblePage( | 
| -  MemPage *pPage,   /* The page to be assemblied */ | 
| +  MemPage *pPage,   /* The page to be assembled */ | 
| int nCell,        /* The number of cells to add to this page */ | 
| u8 **apCell,      /* Pointers to cell bodies */ | 
| u16 *aSize        /* Sizes of the cells */ | 
| @@ -5548,13 +5991,14 @@ static void assemblePage( | 
| assert( pPage->nCell==0 ); | 
| assert( get2byteNotZero(&data[hdr+5])==nUsable ); | 
|  | 
| -  pCellptr = &data[pPage->cellOffset + nCell*2]; | 
| +  pCellptr = &pPage->aCellIdx[nCell*2]; | 
| cellbody = nUsable; | 
| for(i=nCell-1; i>=0; i--){ | 
| +    u16 sz = aSize[i]; | 
| pCellptr -= 2; | 
| -    cellbody -= aSize[i]; | 
| +    cellbody -= sz; | 
| put2byte(pCellptr, cellbody); | 
| -    memcpy(&data[cellbody], apCell[i], aSize[i]); | 
| +    memcpy(&data[cellbody], apCell[i], sz); | 
| } | 
| put2byte(&data[hdr+3], nCell); | 
| put2byte(&data[hdr+5], cellbody); | 
| @@ -5613,7 +6057,7 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ | 
| assert( pPage->nOverflow==1 ); | 
|  | 
| /* This error condition is now caught prior to reaching this function */ | 
| -  if( pPage->nCell<=0 ) return SQLITE_CORRUPT_BKPT; | 
| +  if( pPage->nCell==0 ) return SQLITE_CORRUPT_BKPT; | 
|  | 
| /* Allocate a new page. This page will become the right-sibling of | 
| ** pPage. Make the parent page writable, so that the new divider cell | 
| @@ -5624,7 +6068,7 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ | 
| if( rc==SQLITE_OK ){ | 
|  | 
| u8 *pOut = &pSpace[4]; | 
| -    u8 *pCell = pPage->aOvfl[0].pCell; | 
| +    u8 *pCell = pPage->apOvfl[0]; | 
| u16 szCell = cellSizePtr(pPage, pCell); | 
| u8 *pStop; | 
|  | 
| @@ -5734,7 +6178,7 @@ static int ptrmapCheckPages(MemPage **apPage, int nPage){ | 
| ** map entries are also updated so that the parent page is page pTo. | 
| ** | 
| ** If pFrom is currently carrying any overflow cells (entries in the | 
| -** MemPage.aOvfl[] array), they are not copied to pTo. | 
| +** MemPage.apOvfl[] array), they are not copied to pTo. | 
| ** | 
| ** Before returning, page pTo is reinitialized using btreeInitPage(). | 
| ** | 
| @@ -5823,11 +6267,15 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){ | 
| ** If aOvflSpace is set to a null pointer, this function returns | 
| ** SQLITE_NOMEM. | 
| */ | 
| +#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) | 
| +#pragma optimize("", off) | 
| +#endif | 
| static int balance_nonroot( | 
| MemPage *pParent,               /* Parent page of siblings being balanced */ | 
| int iParentIdx,                 /* Index of "the page" in pParent */ | 
| u8 *aOvflSpace,                 /* page-size bytes of space for parent ovfl */ | 
| -  int isRoot                      /* True if pParent is a root-page */ | 
| +  int isRoot,                     /* True if pParent is a root-page */ | 
| +  int bBulk                       /* True if this call is part of a bulk load */ | 
| ){ | 
| BtShared *pBt;               /* The whole database */ | 
| int nCell = 0;               /* Number of cells in apCell[] */ | 
| @@ -5871,7 +6319,7 @@ static int balance_nonroot( | 
| ** is called (indirectly) from sqlite3BtreeDelete(). | 
| */ | 
| assert( pParent->nOverflow==0 || pParent->nOverflow==1 ); | 
| -  assert( pParent->nOverflow==0 || pParent->aOvfl[0].idx==iParentIdx ); | 
| +  assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx ); | 
|  | 
| if( !aOvflSpace ){ | 
| return SQLITE_NOMEM; | 
| @@ -5891,18 +6339,19 @@ static int balance_nonroot( | 
| i = pParent->nOverflow + pParent->nCell; | 
| if( i<2 ){ | 
| nxDiv = 0; | 
| -    nOld = i+1; | 
| }else{ | 
| -    nOld = 3; | 
| +    assert( bBulk==0 || bBulk==1 ); | 
| if( iParentIdx==0 ){ | 
| nxDiv = 0; | 
| }else if( iParentIdx==i ){ | 
| -      nxDiv = i-2; | 
| +      nxDiv = i-2+bBulk; | 
| }else{ | 
| +      assert( bBulk==0 ); | 
| nxDiv = iParentIdx-1; | 
| } | 
| -    i = 2; | 
| +    i = 2-bBulk; | 
| } | 
| +  nOld = i+1; | 
| if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ | 
| pRight = &pParent->aData[pParent->hdrOffset+8]; | 
| }else{ | 
| @@ -5910,7 +6359,7 @@ static int balance_nonroot( | 
| } | 
| pgno = get4byte(pRight); | 
| while( 1 ){ | 
| -    rc = getAndInitPage(pBt, pgno, &apOld[i]); | 
| +    rc = getAndInitPage(pBt, pgno, &apOld[i], 0); | 
| if( rc ){ | 
| memset(apOld, 0, (i+1)*sizeof(MemPage*)); | 
| goto balance_cleanup; | 
| @@ -5918,8 +6367,8 @@ static int balance_nonroot( | 
| nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; | 
| if( (i--)==0 ) break; | 
|  | 
| -    if( i+nxDiv==pParent->aOvfl[0].idx && pParent->nOverflow ){ | 
| -      apDiv[i] = pParent->aOvfl[0].pCell; | 
| +    if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ | 
| +      apDiv[i] = pParent->apOvfl[0]; | 
| pgno = get4byte(apDiv[i]); | 
| szNew[i] = cellSizePtr(pParent, apDiv[i]); | 
| pParent->nOverflow = 0; | 
| @@ -5935,13 +6384,15 @@ static int balance_nonroot( | 
| ** four bytes of the divider cell. So the pointer is safe to use | 
| ** later on. | 
| ** | 
| -      ** Unless SQLite is compiled in secure-delete mode. In this case, | 
| +      ** But not if we are in secure-delete mode. In secure-delete mode, | 
| ** the dropCell() routine will overwrite the entire cell with zeroes. | 
| ** In this case, temporarily copy the cell into the aOvflSpace[] | 
| ** buffer. It will be copied out again as soon as the aSpace[] buffer | 
| ** is allocated.  */ | 
| -      if( pBt->secureDelete ){ | 
| -        int iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData); | 
| +      if( pBt->btsFlags & BTS_SECURE_DELETE ){ | 
| +        int iOff; | 
| + | 
| +        iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData); | 
| if( (iOff+szNew[i])>(int)pBt->usableSize ){ | 
| rc = SQLITE_CORRUPT_BKPT; | 
| memset(apOld, 0, (i+1)*sizeof(MemPage*)); | 
| @@ -5980,7 +6431,7 @@ static int balance_nonroot( | 
| /* | 
| ** Load pointers to all cells on sibling pages and the divider cells | 
| ** into the local apCell[] array.  Make copies of the divider cells | 
| -  ** into space obtained from aSpace1[] and remove the the divider Cells | 
| +  ** into space obtained from aSpace1[] and remove the divider cells | 
| ** from pParent. | 
| ** | 
| ** If the siblings are on leaf pages, then the child pointers of the | 
| @@ -5994,7 +6445,7 @@ static int balance_nonroot( | 
| **       leafData:  1 if pPage holds key+data and pParent holds only keys. | 
| */ | 
| leafCorrection = apOld[0]->leaf*4; | 
| -  leafData = apOld[0]->hasData; | 
| +  leafData = apOld[0]->intKeyLeaf; | 
| for(i=0; i<nOld; i++){ | 
| int limit; | 
|  | 
| @@ -6008,12 +6459,24 @@ static int balance_nonroot( | 
| memcpy(pOld->aData, apOld[i]->aData, pBt->pageSize); | 
|  | 
| limit = pOld->nCell+pOld->nOverflow; | 
| -    for(j=0; j<limit; j++){ | 
| -      assert( nCell<nMaxCells ); | 
| -      apCell[nCell] = findOverflowCell(pOld, j); | 
| -      szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); | 
| -      nCell++; | 
| -    } | 
| +    if( pOld->nOverflow>0 ){ | 
| +      for(j=0; j<limit; j++){ | 
| +        assert( nCell<nMaxCells ); | 
| +        apCell[nCell] = findOverflowCell(pOld, j); | 
| +        szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); | 
| +        nCell++; | 
| +      } | 
| +    }else{ | 
| +      u8 *aData = pOld->aData; | 
| +      u16 maskPage = pOld->maskPage; | 
| +      u16 cellOffset = pOld->cellOffset; | 
| +      for(j=0; j<limit; j++){ | 
| +        assert( nCell<nMaxCells ); | 
| +        apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j); | 
| +        szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); | 
| +        nCell++; | 
| +      } | 
| +    } | 
| if( i<nOld-1 && !leafData){ | 
| u16 sz = (u16)szNew[i]; | 
| u8 *pTemp; | 
| @@ -6097,7 +6560,9 @@ static int balance_nonroot( | 
| d = r + 1 - leafData; | 
| assert( d<nMaxCells ); | 
| assert( r<nMaxCells ); | 
| -    while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){ | 
| +    while( szRight==0 | 
| +       || (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2)) | 
| +    ){ | 
| szRight += szCell[d] + 2; | 
| szLeft -= szCell[r] + 2; | 
| cntNew[i-1]--; | 
| @@ -6111,8 +6576,14 @@ static int balance_nonroot( | 
| /* Either we found one or more cells (cntnew[0])>0) or pPage is | 
| ** a virtual root page.  A virtual root page is when the real root | 
| ** page is page 1 and we are the only child of that page. | 
| +  ** | 
| +  ** UPDATE:  The assert() below is not necessarily true if the database | 
| +  ** file is corrupt.  The corruption will be detected and reported later | 
| +  ** in this procedure so there is no need to act upon it now. | 
| */ | 
| +#if 0 | 
| assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) ); | 
| +#endif | 
|  | 
| TRACE(("BALANCE: old: %d %d %d  ", | 
| apOld[0]->pgno, | 
| @@ -6138,7 +6609,7 @@ static int balance_nonroot( | 
| if( rc ) goto balance_cleanup; | 
| }else{ | 
| assert( i>0 ); | 
| -      rc = allocateBtreePage(pBt, &pNew, &pgno, pgno, 0); | 
| +      rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); | 
| if( rc ) goto balance_cleanup; | 
| apNew[i] = pNew; | 
| nNew++; | 
| @@ -6164,7 +6635,7 @@ static int balance_nonroot( | 
| } | 
|  | 
| /* | 
| -  ** Put the new pages in accending order.  This helps to | 
| +  ** Put the new pages in ascending order.  This helps to | 
| ** keep entries in the disk file in order so that a scan | 
| ** of the table is a linear scan through the file.  That | 
| ** in turn helps the operating system to deliver pages | 
| @@ -6340,7 +6811,7 @@ static int balance_nonroot( | 
| MemPage *pOld = apCopy[0]; | 
| int nOverflow = pOld->nOverflow; | 
| int iNextOld = pOld->nCell + nOverflow; | 
| -    int iOverflow = (nOverflow ? pOld->aOvfl[0].idx : -1); | 
| +    int iOverflow = (nOverflow ? pOld->aiOvfl[0] : -1); | 
| j = 0;                             /* Current 'old' sibling page */ | 
| k = 0;                             /* Current 'new' sibling page */ | 
| for(i=0; i<nCell; i++){ | 
| @@ -6349,18 +6820,20 @@ static int balance_nonroot( | 
| /* Cell i is the cell immediately following the last cell on old | 
| ** sibling page j. If the siblings are not leaf pages of an | 
| ** intkey b-tree, then cell i was a divider cell. */ | 
| +        assert( j+1 < ArraySize(apCopy) ); | 
| +        assert( j+1 < nOld ); | 
| pOld = apCopy[++j]; | 
| iNextOld = i + !leafData + pOld->nCell + pOld->nOverflow; | 
| if( pOld->nOverflow ){ | 
| nOverflow = pOld->nOverflow; | 
| -          iOverflow = i + !leafData + pOld->aOvfl[0].idx; | 
| +          iOverflow = i + !leafData + pOld->aiOvfl[0]; | 
| } | 
| isDivider = !leafData; | 
| } | 
|  | 
| assert(nOverflow>0 || iOverflow<i ); | 
| -      assert(nOverflow<2 || pOld->aOvfl[0].idx==pOld->aOvfl[1].idx-1); | 
| -      assert(nOverflow<3 || pOld->aOvfl[1].idx==pOld->aOvfl[2].idx-1); | 
| +      assert(nOverflow<2 || pOld->aiOvfl[0]==pOld->aiOvfl[1]-1); | 
| +      assert(nOverflow<3 || pOld->aiOvfl[1]==pOld->aiOvfl[2]-1); | 
| if( i==iOverflow ){ | 
| isDivider = 1; | 
| if( (--nOverflow)>0 ){ | 
| @@ -6427,6 +6900,9 @@ balance_cleanup: | 
|  | 
| return rc; | 
| } | 
| +#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) | 
| +#pragma optimize("", on) | 
| +#endif | 
|  | 
|  | 
| /* | 
| @@ -6481,7 +6957,10 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){ | 
| TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno)); | 
|  | 
| /* Copy the overflow cells from pRoot to pChild */ | 
| -  memcpy(pChild->aOvfl, pRoot->aOvfl, pRoot->nOverflow*sizeof(pRoot->aOvfl[0])); | 
| +  memcpy(pChild->aiOvfl, pRoot->aiOvfl, | 
| +         pRoot->nOverflow*sizeof(pRoot->aiOvfl[0])); | 
| +  memcpy(pChild->apOvfl, pRoot->apOvfl, | 
| +         pRoot->nOverflow*sizeof(pRoot->apOvfl[0])); | 
| pChild->nOverflow = pRoot->nOverflow; | 
|  | 
| /* Zero the contents of pRoot. Then install pChild as the right-child. */ | 
| @@ -6542,16 +7021,16 @@ static int balance(BtCursor *pCur){ | 
| rc = sqlite3PagerWrite(pParent->pDbPage); | 
| if( rc==SQLITE_OK ){ | 
| #ifndef SQLITE_OMIT_QUICKBALANCE | 
| -        if( pPage->hasData | 
| +        if( pPage->intKeyLeaf | 
| && pPage->nOverflow==1 | 
| -         && pPage->aOvfl[0].idx==pPage->nCell | 
| +         && pPage->aiOvfl[0]==pPage->nCell | 
| && pParent->pgno!=1 | 
| && pParent->nCell==iIdx | 
| ){ | 
| /* Call balance_quick() to create a new sibling of pPage on which | 
| ** to store the overflow cell. balance_quick() inserts a new cell | 
| ** into pParent, which may cause pParent overflow. If this | 
| -          ** happens, the next interation of the do-loop will balance pParent | 
| +          ** happens, the next iteration of the do-loop will balance pParent | 
| ** use either balance_nonroot() or balance_deeper(). Until this | 
| ** happens, the overflow cell is stored in the aBalanceQuickSpace[] | 
| ** buffer. | 
| @@ -6584,7 +7063,7 @@ static int balance(BtCursor *pCur){ | 
| ** pSpace buffer passed to the latter call to balance_nonroot(). | 
| */ | 
| u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); | 
| -          rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1); | 
| +          rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, pCur->hints); | 
| if( pFree ){ | 
| /* If pFree is not NULL, it points to the pSpace buffer used | 
| ** by a previous call to balance_nonroot(). Its contents are | 
| @@ -6628,7 +7107,7 @@ static int balance(BtCursor *pCur){ | 
| ** MovetoUnpacked() to seek cursor pCur to (pKey, nKey) has already | 
| ** been performed. seekResult is the search result returned (a negative | 
| ** number if pCur points at an entry that is smaller than (pKey, nKey), or | 
| -** a positive value if pCur points at an etry that is larger than | 
| +** a positive value if pCur points at an entry that is larger than | 
| ** (pKey, nKey)). | 
| ** | 
| ** If the seekResult parameter is non-zero, then the caller guarantees that | 
| @@ -6661,7 +7140,9 @@ int sqlite3BtreeInsert( | 
| } | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| -  assert( pCur->wrFlag && pBt->inTransaction==TRANS_WRITE && !pBt->readOnly ); | 
| +  assert( (pCur->curFlags & BTCF_WriteFlag)!=0 | 
| +              && pBt->inTransaction==TRANS_WRITE | 
| +              && (pBt->btsFlags & BTS_READ_ONLY)==0 ); | 
| assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); | 
|  | 
| /* Assert that the caller has been consistent. If this cursor was opened | 
| @@ -6671,13 +7152,6 @@ int sqlite3BtreeInsert( | 
| ** blob of associated data.  */ | 
| assert( (pKey==0)==(pCur->pKeyInfo==0) ); | 
|  | 
| -  /* If this is an insert into a table b-tree, invalidate any incrblob | 
| -  ** cursors open on the row being replaced (assuming this is a replace | 
| -  ** operation - if it is not, the following is a no-op).  */ | 
| -  if( pCur->pKeyInfo==0 ){ | 
| -    invalidateIncrblobCursors(p, nKey, 0); | 
| -  } | 
| - | 
| /* Save the positions of any other cursors open on this table. | 
| ** | 
| ** In some cases, the call to btreeMoveto() below is a no-op. For | 
| @@ -6691,6 +7165,21 @@ int sqlite3BtreeInsert( | 
| */ | 
| rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); | 
| if( rc ) return rc; | 
| + | 
| +  if( pCur->pKeyInfo==0 ){ | 
| +    /* If this is an insert into a table b-tree, invalidate any incrblob | 
| +    ** cursors open on the row being replaced */ | 
| +    invalidateIncrblobCursors(p, nKey, 0); | 
| + | 
| +    /* If the cursor is currently on the last row and we are appending a | 
| +    ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() | 
| +    ** call */ | 
| +    if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 | 
| +      && pCur->info.nKey==nKey-1 ){ | 
| +      loc = -1; | 
| +    } | 
| +  } | 
| + | 
| if( !loc ){ | 
| rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); | 
| if( rc ) return rc; | 
| @@ -6705,9 +7194,8 @@ int sqlite3BtreeInsert( | 
| pCur->pgnoRoot, nKey, nData, pPage->pgno, | 
| loc==0 ? "overwrite" : "new entry")); | 
| assert( pPage->isInit ); | 
| -  allocateTempSpace(pBt); | 
| newCell = pBt->pTmpSpace; | 
| -  if( newCell==0 ) return SQLITE_NOMEM; | 
| +  assert( newCell!=0 ); | 
| rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); | 
| if( rc ) goto end_insert; | 
| assert( szNew==cellSizePtr(pPage, newCell) ); | 
| @@ -6724,8 +7212,7 @@ int sqlite3BtreeInsert( | 
| if( !pPage->leaf ){ | 
| memcpy(newCell, oldCell, 4); | 
| } | 
| -    szOld = cellSizePtr(pPage, oldCell); | 
| -    rc = clearCell(pPage, oldCell); | 
| +    rc = clearCell(pPage, oldCell, &szOld); | 
| dropCell(pPage, idx, szOld, &rc); | 
| if( rc ) goto end_insert; | 
| }else if( loc<0 && pPage->nCell>0 ){ | 
| @@ -6737,9 +7224,9 @@ int sqlite3BtreeInsert( | 
| insertCell(pPage, idx, newCell, szNew, 0, 0, &rc); | 
| assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 ); | 
|  | 
| -  /* If no error has occured and pPage has an overflow cell, call balance() | 
| +  /* If no error has occurred and pPage has an overflow cell, call balance() | 
| ** to redistribute the cells within the tree. Since balance() may move | 
| -  ** the cursor, zero the BtCursor.info.nSize and BtCursor.validNKey | 
| +  ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey | 
| ** variables. | 
| ** | 
| ** Previous versions of SQLite called moveToRoot() to move the cursor | 
| @@ -6758,8 +7245,8 @@ int sqlite3BtreeInsert( | 
| ** row without seeking the cursor. This can be a big performance boost. | 
| */ | 
| pCur->info.nSize = 0; | 
| -  pCur->validNKey = 0; | 
| if( rc==SQLITE_OK && pPage->nOverflow ){ | 
| +    pCur->curFlags &= ~(BTCF_ValidNKey); | 
| rc = balance(pCur); | 
|  | 
| /* Must make sure nOverflow is reset to zero even if the balance() | 
| @@ -6777,7 +7264,7 @@ end_insert: | 
|  | 
| /* | 
| ** Delete the entry that the cursor is pointing to.  The cursor | 
| -** is left pointing at a arbitrary location. | 
| +** is left pointing at an arbitrary location. | 
| */ | 
| int sqlite3BtreeDelete(BtCursor *pCur){ | 
| Btree *p = pCur->pBtree; | 
| @@ -6787,11 +7274,12 @@ int sqlite3BtreeDelete(BtCursor *pCur){ | 
| unsigned char *pCell;                /* Pointer to cell to delete */ | 
| int iCellIdx;                        /* Index of cell to delete */ | 
| int iCellDepth;                      /* Depth of node containing pCell */ | 
| +  u16 szCell;                          /* Size of the cell being deleted */ | 
|  | 
| assert( cursorHoldsMutex(pCur) ); | 
| assert( pBt->inTransaction==TRANS_WRITE ); | 
| -  assert( !pBt->readOnly ); | 
| -  assert( pCur->wrFlag ); | 
| +  assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); | 
| +  assert( pCur->curFlags & BTCF_WriteFlag ); | 
| assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); | 
| assert( !hasReadConflicts(p, pCur->pgnoRoot) ); | 
|  | 
| @@ -6801,12 +7289,6 @@ int sqlite3BtreeDelete(BtCursor *pCur){ | 
| return SQLITE_ERROR;  /* Something has gone awry. */ | 
| } | 
|  | 
| -  /* If this is a delete operation to remove a row from a table b-tree, | 
| -  ** invalidate any incrblob cursors open on the row being deleted.  */ | 
| -  if( pCur->pKeyInfo==0 ){ | 
| -    invalidateIncrblobCursors(p, pCur->info.nKey, 0); | 
| -  } | 
| - | 
| iCellDepth = pCur->iPage; | 
| iCellIdx = pCur->aiIdx[iCellDepth]; | 
| pPage = pCur->apPage[iCellDepth]; | 
| @@ -6820,7 +7302,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){ | 
| ** sub-tree headed by the child page of the cell being deleted. This makes | 
| ** balancing the tree following the delete operation easier.  */ | 
| if( !pPage->leaf ){ | 
| -    int notUsed; | 
| +    int notUsed = 0; | 
| rc = sqlite3BtreePrevious(pCur, ¬Used); | 
| if( rc ) return rc; | 
| } | 
| @@ -6832,10 +7314,17 @@ int sqlite3BtreeDelete(BtCursor *pCur){ | 
| */ | 
| rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); | 
| if( rc ) return rc; | 
| + | 
| +  /* If this is a delete operation to remove a row from a table b-tree, | 
| +  ** invalidate any incrblob cursors open on the row being deleted.  */ | 
| +  if( pCur->pKeyInfo==0 ){ | 
| +    invalidateIncrblobCursors(p, pCur->info.nKey, 0); | 
| +  } | 
| + | 
| rc = sqlite3PagerWrite(pPage->pDbPage); | 
| if( rc ) return rc; | 
| -  rc = clearCell(pPage, pCell); | 
| -  dropCell(pPage, iCellIdx, cellSizePtr(pPage, pCell), &rc); | 
| +  rc = clearCell(pPage, pCell, &szCell); | 
| +  dropCell(pPage, iCellIdx, szCell, &rc); | 
| if( rc ) return rc; | 
|  | 
| /* If the cell deleted was not located on a leaf page, then the cursor | 
| @@ -6852,10 +7341,8 @@ int sqlite3BtreeDelete(BtCursor *pCur){ | 
| pCell = findCell(pLeaf, pLeaf->nCell-1); | 
| nCell = cellSizePtr(pLeaf, pCell); | 
| assert( MX_CELL_SIZE(pBt) >= nCell ); | 
| - | 
| -    allocateTempSpace(pBt); | 
| pTmp = pBt->pTmpSpace; | 
| - | 
| +    assert( pTmp!=0 ); | 
| rc = sqlite3PagerWrite(pLeaf->pDbPage); | 
| insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); | 
| dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); | 
| @@ -6911,7 +7398,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ | 
|  | 
| assert( sqlite3BtreeHoldsMutex(p) ); | 
| assert( pBt->inTransaction==TRANS_WRITE ); | 
| -  assert( !pBt->readOnly ); | 
| +  assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); | 
|  | 
| #ifdef SQLITE_OMIT_AUTOVACUUM | 
| rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); | 
| @@ -6950,7 +7437,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ | 
| ** be moved to the allocated page (unless the allocated page happens | 
| ** to reside at pgnoRoot). | 
| */ | 
| -    rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1); | 
| +    rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); | 
| if( rc!=SQLITE_OK ){ | 
| return rc; | 
| } | 
| @@ -6965,7 +7452,14 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ | 
| u8 eType = 0; | 
| Pgno iPtrPage = 0; | 
|  | 
| +      /* Save the positions of any open cursors. This is required in | 
| +      ** case they are holding a reference to an xFetch reference | 
| +      ** corresponding to page pgnoRoot.  */ | 
| +      rc = saveAllCursors(pBt, 0, 0); | 
| releasePage(pPageMove); | 
| +      if( rc!=SQLITE_OK ){ | 
| +        return rc; | 
| +      } | 
|  | 
| /* Move the page currently at pgnoRoot to pgnoMove. */ | 
| rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); | 
| @@ -7059,25 +7553,28 @@ static int clearDatabasePage( | 
| int rc; | 
| unsigned char *pCell; | 
| int i; | 
| +  int hdr; | 
| +  u16 szCell; | 
|  | 
| assert( sqlite3_mutex_held(pBt->mutex) ); | 
| if( pgno>btreePagecount(pBt) ){ | 
| return SQLITE_CORRUPT_BKPT; | 
| } | 
|  | 
| -  rc = getAndInitPage(pBt, pgno, &pPage); | 
| +  rc = getAndInitPage(pBt, pgno, &pPage, 0); | 
| if( rc ) return rc; | 
| +  hdr = pPage->hdrOffset; | 
| for(i=0; i<pPage->nCell; i++){ | 
| pCell = findCell(pPage, i); | 
| if( !pPage->leaf ){ | 
| rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); | 
| if( rc ) goto cleardatabasepage_out; | 
| } | 
| -    rc = clearCell(pPage, pCell); | 
| +    rc = clearCell(pPage, pCell, &szCell); | 
| if( rc ) goto cleardatabasepage_out; | 
| } | 
| if( !pPage->leaf ){ | 
| -    rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), 1, pnChange); | 
| +    rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); | 
| if( rc ) goto cleardatabasepage_out; | 
| }else if( pnChange ){ | 
| assert( pPage->intKey ); | 
| @@ -7086,7 +7583,7 @@ static int clearDatabasePage( | 
| if( freePageFlag ){ | 
| freePage(pPage, &rc); | 
| }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ | 
| -    zeroPage(pPage, pPage->aData[0] | PTF_LEAF); | 
| +    zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); | 
| } | 
|  | 
| cleardatabasepage_out: | 
| @@ -7113,13 +7610,13 @@ int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){ | 
| sqlite3BtreeEnter(p); | 
| assert( p->inTrans==TRANS_WRITE ); | 
|  | 
| -  /* Invalidate all incrblob cursors open on table iTable (assuming iTable | 
| -  ** is the root of a table b-tree - if it is not, the following call is | 
| -  ** a no-op).  */ | 
| -  invalidateIncrblobCursors(p, 0, 1); | 
| - | 
| rc = saveAllCursors(pBt, (Pgno)iTable, 0); | 
| + | 
| if( SQLITE_OK==rc ){ | 
| +    /* Invalidate all incrblob cursors open on table iTable (assuming iTable | 
| +    ** is the root of a table b-tree - if it is not, the following call is | 
| +    ** a no-op).  */ | 
| +    invalidateIncrblobCursors(p, 0, 1); | 
| rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); | 
| } | 
| sqlite3BtreeLeave(p); | 
| @@ -7127,6 +7624,15 @@ int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){ | 
| } | 
|  | 
| /* | 
| +** Delete all information from the single table that pCur is open on. | 
| +** | 
| +** This routine only work for pCur on an ephemeral table. | 
| +*/ | 
| +int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){ | 
| +  return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0); | 
| +} | 
| + | 
| +/* | 
| ** Erase all information in a table and add the root of the table to | 
| ** the freelist.  Except, the root of the principle table (the one on | 
| ** page 1) is never added to the freelist. | 
| @@ -7285,7 +7791,9 @@ void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ | 
| /* If auto-vacuum is disabled in this build and this is an auto-vacuum | 
| ** database, mark the database as read-only.  */ | 
| #ifdef SQLITE_OMIT_AUTOVACUUM | 
| -  if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ) pBt->readOnly = 1; | 
| +  if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){ | 
| +    pBt->btsFlags |= BTS_READ_ONLY; | 
| +  } | 
| #endif | 
|  | 
| sqlite3BtreeLeave(p); | 
| @@ -7331,6 +7839,11 @@ int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ | 
| int sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){ | 
| i64 nEntry = 0;                      /* Value to return in *pnEntry */ | 
| int rc;                              /* Return code */ | 
| + | 
| +  if( pCur->pgnoRoot==0 ){ | 
| +    *pnEntry = 0; | 
| +    return SQLITE_OK; | 
| +  } | 
| rc = moveToRoot(pCur); | 
|  | 
| /* Unless an error occurs, the following loop runs one iteration for each | 
| @@ -7403,11 +7916,11 @@ Pager *sqlite3BtreePager(Btree *p){ | 
| */ | 
| static void checkAppendMsg( | 
| IntegrityCk *pCheck, | 
| -  char *zMsg1, | 
| const char *zFormat, | 
| ... | 
| ){ | 
| va_list ap; | 
| +  char zBuf[200]; | 
| if( !pCheck->mxErr ) return; | 
| pCheck->mxErr--; | 
| pCheck->nErr++; | 
| @@ -7415,37 +7928,58 @@ static void checkAppendMsg( | 
| if( pCheck->errMsg.nChar ){ | 
| sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); | 
| } | 
| -  if( zMsg1 ){ | 
| -    sqlite3StrAccumAppend(&pCheck->errMsg, zMsg1, -1); | 
| +  if( pCheck->zPfx ){ | 
| +    sqlite3_snprintf(sizeof(zBuf), zBuf, pCheck->zPfx, pCheck->v1, pCheck->v2); | 
| +    sqlite3StrAccumAppendAll(&pCheck->errMsg, zBuf); | 
| } | 
| sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); | 
| va_end(ap); | 
| -  if( pCheck->errMsg.mallocFailed ){ | 
| +  if( pCheck->errMsg.accError==STRACCUM_NOMEM ){ | 
| pCheck->mallocFailed = 1; | 
| } | 
| } | 
| #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | 
|  | 
| #ifndef SQLITE_OMIT_INTEGRITY_CHECK | 
| + | 
| +/* | 
| +** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that | 
| +** corresponds to page iPg is already set. | 
| +*/ | 
| +static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){ | 
| +  assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); | 
| +  return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07))); | 
| +} | 
| + | 
| +/* | 
| +** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg. | 
| +*/ | 
| +static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){ | 
| +  assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); | 
| +  pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07)); | 
| +} | 
| + | 
| + | 
| /* | 
| ** Add 1 to the reference count for page iPage.  If this is the second | 
| ** reference to the page, add an error message to pCheck->zErrMsg. | 
| -** Return 1 if there are 2 ore more references to the page and 0 if | 
| +** Return 1 if there are 2 or more references to the page and 0 if | 
| ** if this is the first reference to the page. | 
| ** | 
| ** Also check that the page number is in bounds. | 
| */ | 
| -static int checkRef(IntegrityCk *pCheck, Pgno iPage, char *zContext){ | 
| +static int checkRef(IntegrityCk *pCheck, Pgno iPage){ | 
| if( iPage==0 ) return 1; | 
| if( iPage>pCheck->nPage ){ | 
| -    checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage); | 
| +    checkAppendMsg(pCheck, "invalid page number %d", iPage); | 
| return 1; | 
| } | 
| -  if( pCheck->anRef[iPage]==1 ){ | 
| -    checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage); | 
| +  if( getPageReferenced(pCheck, iPage) ){ | 
| +    checkAppendMsg(pCheck, "2nd reference to page %d", iPage); | 
| return 1; | 
| } | 
| -  return  (pCheck->anRef[iPage]++)>1; | 
| +  setPageReferenced(pCheck, iPage); | 
| +  return 0; | 
| } | 
|  | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| @@ -7458,8 +7992,7 @@ static void checkPtrmap( | 
| IntegrityCk *pCheck,   /* Integrity check context */ | 
| Pgno iChild,           /* Child page number */ | 
| u8 eType,              /* Expected pointer map type */ | 
| -  Pgno iParent,          /* Expected pointer map parent page number */ | 
| -  char *zContext         /* Context description (used for error msg) */ | 
| +  Pgno iParent           /* Expected pointer map parent page number */ | 
| ){ | 
| int rc; | 
| u8 ePtrmapType; | 
| @@ -7468,12 +8001,12 @@ static void checkPtrmap( | 
| rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); | 
| if( rc!=SQLITE_OK ){ | 
| if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->mallocFailed = 1; | 
| -    checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild); | 
| +    checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild); | 
| return; | 
| } | 
|  | 
| if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ | 
| -    checkAppendMsg(pCheck, zContext, | 
| +    checkAppendMsg(pCheck, | 
| "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", | 
| iChild, eType, iParent, ePtrmapType, iPtrmapParent); | 
| } | 
| @@ -7488,8 +8021,7 @@ static void checkList( | 
| IntegrityCk *pCheck,  /* Integrity checking context */ | 
| int isFreeList,       /* True for a freelist.  False for overflow page list */ | 
| int iPage,            /* Page number for first page in the list */ | 
| -  int N,                /* Expected number of pages in the list */ | 
| -  char *zContext        /* Context for error messages */ | 
| +  int N                 /* Expected number of pages in the list */ | 
| ){ | 
| int i; | 
| int expected = N; | 
| @@ -7498,14 +8030,14 @@ static void checkList( | 
| DbPage *pOvflPage; | 
| unsigned char *pOvflData; | 
| if( iPage<1 ){ | 
| -      checkAppendMsg(pCheck, zContext, | 
| +      checkAppendMsg(pCheck, | 
| "%d of %d pages missing from overflow list starting at %d", | 
| N+1, expected, iFirst); | 
| break; | 
| } | 
| -    if( checkRef(pCheck, iPage, zContext) ) break; | 
| +    if( checkRef(pCheck, iPage) ) break; | 
| if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ | 
| -      checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage); | 
| +      checkAppendMsg(pCheck, "failed to get page %d", iPage); | 
| break; | 
| } | 
| pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); | 
| @@ -7513,11 +8045,11 @@ static void checkList( | 
| int n = get4byte(&pOvflData[4]); | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| if( pCheck->pBt->autoVacuum ){ | 
| -        checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext); | 
| +        checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); | 
| } | 
| #endif | 
| if( n>(int)pCheck->pBt->usableSize/4-2 ){ | 
| -        checkAppendMsg(pCheck, zContext, | 
| +        checkAppendMsg(pCheck, | 
| "freelist leaf count too big on page %d", iPage); | 
| N--; | 
| }else{ | 
| @@ -7525,10 +8057,10 @@ static void checkList( | 
| Pgno iFreePage = get4byte(&pOvflData[8+i*4]); | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| if( pCheck->pBt->autoVacuum ){ | 
| -            checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext); | 
| +            checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0); | 
| } | 
| #endif | 
| -          checkRef(pCheck, iFreePage, zContext); | 
| +          checkRef(pCheck, iFreePage); | 
| } | 
| N -= n; | 
| } | 
| @@ -7541,7 +8073,7 @@ static void checkList( | 
| */ | 
| if( pCheck->pBt->autoVacuum && N>0 ){ | 
| i = get4byte(pOvflData); | 
| -        checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext); | 
| +        checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); | 
| } | 
| } | 
| #endif | 
| @@ -7573,7 +8105,6 @@ static void checkList( | 
| static int checkTreePage( | 
| IntegrityCk *pCheck,  /* Context for the sanity check */ | 
| int iPage,            /* Page number of the page to check */ | 
| -  char *zParentContext, /* Parent context */ | 
| i64 *pnParentMinKey, | 
| i64 *pnParentMaxKey | 
| ){ | 
| @@ -7584,23 +8115,26 @@ static int checkTreePage( | 
| u8 *data; | 
| BtShared *pBt; | 
| int usableSize; | 
| -  char zContext[100]; | 
| char *hit = 0; | 
| i64 nMinKey = 0; | 
| i64 nMaxKey = 0; | 
| - | 
| -  sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage); | 
| +  const char *saved_zPfx = pCheck->zPfx; | 
| +  int saved_v1 = pCheck->v1; | 
| +  int saved_v2 = pCheck->v2; | 
|  | 
| /* Check that the page exists | 
| */ | 
| pBt = pCheck->pBt; | 
| usableSize = pBt->usableSize; | 
| if( iPage==0 ) return 0; | 
| -  if( checkRef(pCheck, iPage, zParentContext) ) return 0; | 
| +  if( checkRef(pCheck, iPage) ) return 0; | 
| +  pCheck->zPfx = "Page %d: "; | 
| +  pCheck->v1 = iPage; | 
| if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ | 
| -    checkAppendMsg(pCheck, zContext, | 
| +    checkAppendMsg(pCheck, | 
| "unable to get the page. error code=%d", rc); | 
| -    return 0; | 
| +    depth = -1; | 
| +    goto end_of_check; | 
| } | 
|  | 
| /* Clear MemPage.isInit to make sure the corruption detection code in | 
| @@ -7608,10 +8142,11 @@ static int checkTreePage( | 
| pPage->isInit = 0; | 
| if( (rc = btreeInitPage(pPage))!=0 ){ | 
| assert( rc==SQLITE_CORRUPT );  /* The only possible error from InitPage */ | 
| -    checkAppendMsg(pCheck, zContext, | 
| +    checkAppendMsg(pCheck, | 
| "btreeInitPage() returns error code %d", rc); | 
| releasePage(pPage); | 
| -    return 0; | 
| +    depth = -1; | 
| +    goto end_of_check; | 
| } | 
|  | 
| /* Check out all the cells. | 
| @@ -7624,23 +8159,23 @@ static int checkTreePage( | 
|  | 
| /* Check payload overflow pages | 
| */ | 
| -    sqlite3_snprintf(sizeof(zContext), zContext, | 
| -             "On tree page %d cell %d: ", iPage, i); | 
| +    pCheck->zPfx = "On tree page %d cell %d: "; | 
| +    pCheck->v1 = iPage; | 
| +    pCheck->v2 = i; | 
| pCell = findCell(pPage,i); | 
| btreeParseCellPtr(pPage, pCell, &info); | 
| -    sz = info.nData; | 
| -    if( !pPage->intKey ) sz += (int)info.nKey; | 
| +    sz = info.nPayload; | 
| /* For intKey pages, check that the keys are in order. | 
| */ | 
| -    else if( i==0 ) nMinKey = nMaxKey = info.nKey; | 
| -    else{ | 
| -      if( info.nKey <= nMaxKey ){ | 
| -        checkAppendMsg(pCheck, zContext, | 
| -            "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey); | 
| +    if( pPage->intKey ){ | 
| +      if( i==0 ){ | 
| +        nMinKey = nMaxKey = info.nKey; | 
| +      }else if( info.nKey <= nMaxKey ){ | 
| +        checkAppendMsg(pCheck, | 
| +           "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey); | 
| } | 
| nMaxKey = info.nKey; | 
| } | 
| -    assert( sz==info.nPayload ); | 
| if( (sz>info.nLocal) | 
| && (&pCell[info.iOverflow]<=&pPage->aData[pBt->usableSize]) | 
| ){ | 
| @@ -7648,10 +8183,10 @@ static int checkTreePage( | 
| Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| if( pBt->autoVacuum ){ | 
| -        checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext); | 
| +        checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); | 
| } | 
| #endif | 
| -      checkList(pCheck, 0, pgnoOvfl, nPage, zContext); | 
| +      checkList(pCheck, 0, pgnoOvfl, nPage); | 
| } | 
|  | 
| /* Check sanity of left child page. | 
| @@ -7660,12 +8195,12 @@ static int checkTreePage( | 
| pgno = get4byte(pCell); | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| if( pBt->autoVacuum ){ | 
| -        checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext); | 
| +        checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); | 
| } | 
| #endif | 
| -      d2 = checkTreePage(pCheck, pgno, zContext, &nMinKey, i==0 ? NULL : &nMaxKey); | 
| +      d2 = checkTreePage(pCheck, pgno, &nMinKey, i==0?NULL:&nMaxKey); | 
| if( i>0 && d2!=depth ){ | 
| -        checkAppendMsg(pCheck, zContext, "Child page depth differs"); | 
| +        checkAppendMsg(pCheck, "Child page depth differs"); | 
| } | 
| depth = d2; | 
| } | 
| @@ -7673,37 +8208,39 @@ static int checkTreePage( | 
|  | 
| if( !pPage->leaf ){ | 
| pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); | 
| -    sqlite3_snprintf(sizeof(zContext), zContext, | 
| -                     "On page %d at right child: ", iPage); | 
| +    pCheck->zPfx = "On page %d at right child: "; | 
| +    pCheck->v1 = iPage; | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| if( pBt->autoVacuum ){ | 
| -      checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext); | 
| +      checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); | 
| } | 
| #endif | 
| -    checkTreePage(pCheck, pgno, zContext, NULL, !pPage->nCell ? NULL : &nMaxKey); | 
| +    checkTreePage(pCheck, pgno, NULL, !pPage->nCell?NULL:&nMaxKey); | 
| } | 
|  | 
| /* For intKey leaf pages, check that the min/max keys are in order | 
| ** with any left/parent/right pages. | 
| */ | 
| +  pCheck->zPfx = "Page %d: "; | 
| +  pCheck->v1 = iPage; | 
| if( pPage->leaf && pPage->intKey ){ | 
| /* if we are a left child page */ | 
| if( pnParentMinKey ){ | 
| /* if we are the left most child page */ | 
| if( !pnParentMaxKey ){ | 
| if( nMaxKey > *pnParentMinKey ){ | 
| -          checkAppendMsg(pCheck, zContext, | 
| +          checkAppendMsg(pCheck, | 
| "Rowid %lld out of order (max larger than parent min of %lld)", | 
| nMaxKey, *pnParentMinKey); | 
| } | 
| }else{ | 
| if( nMinKey <= *pnParentMinKey ){ | 
| -          checkAppendMsg(pCheck, zContext, | 
| +          checkAppendMsg(pCheck, | 
| "Rowid %lld out of order (min less than parent min of %lld)", | 
| nMinKey, *pnParentMinKey); | 
| } | 
| if( nMaxKey > *pnParentMaxKey ){ | 
| -          checkAppendMsg(pCheck, zContext, | 
| +          checkAppendMsg(pCheck, | 
| "Rowid %lld out of order (max larger than parent max of %lld)", | 
| nMaxKey, *pnParentMaxKey); | 
| } | 
| @@ -7712,7 +8249,7 @@ static int checkTreePage( | 
| /* else if we're a right child page */ | 
| } else if( pnParentMaxKey ){ | 
| if( nMinKey <= *pnParentMaxKey ){ | 
| -        checkAppendMsg(pCheck, zContext, | 
| +        checkAppendMsg(pCheck, | 
| "Rowid %lld out of order (min less than parent max of %lld)", | 
| nMinKey, *pnParentMaxKey); | 
| } | 
| @@ -7724,6 +8261,7 @@ static int checkTreePage( | 
| data = pPage->aData; | 
| hdr = pPage->hdrOffset; | 
| hit = sqlite3PageMalloc( pBt->pageSize ); | 
| +  pCheck->zPfx = 0; | 
| if( hit==0 ){ | 
| pCheck->mallocFailed = 1; | 
| }else{ | 
| @@ -7741,7 +8279,8 @@ static int checkTreePage( | 
| size = cellSizePtr(pPage, &data[pc]); | 
| } | 
| if( (int)(pc+size-1)>=usableSize ){ | 
| -        checkAppendMsg(pCheck, 0, | 
| +        pCheck->zPfx = 0; | 
| +        checkAppendMsg(pCheck, | 
| "Corruption detected in cell %d on page %d",i,iPage); | 
| }else{ | 
| for(j=pc+size-1; j>=pc; j--) hit[j]++; | 
| @@ -7763,19 +8302,24 @@ static int checkTreePage( | 
| if( hit[i]==0 ){ | 
| cnt++; | 
| }else if( hit[i]>1 ){ | 
| -        checkAppendMsg(pCheck, 0, | 
| +        checkAppendMsg(pCheck, | 
| "Multiple uses for byte %d of page %d", i, iPage); | 
| break; | 
| } | 
| } | 
| if( cnt!=data[hdr+7] ){ | 
| -      checkAppendMsg(pCheck, 0, | 
| +      checkAppendMsg(pCheck, | 
| "Fragmentation of %d bytes reported as %d on page %d", | 
| cnt, data[hdr+7], iPage); | 
| } | 
| } | 
| sqlite3PageFree(hit); | 
| releasePage(pPage); | 
| + | 
| +end_of_check: | 
| +  pCheck->zPfx = saved_zPfx; | 
| +  pCheck->v1 = saved_v1; | 
| +  pCheck->v2 = saved_v2; | 
| return depth+1; | 
| } | 
| #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | 
| @@ -7816,29 +8360,32 @@ char *sqlite3BtreeIntegrityCheck( | 
| sCheck.mxErr = mxErr; | 
| sCheck.nErr = 0; | 
| sCheck.mallocFailed = 0; | 
| +  sCheck.zPfx = 0; | 
| +  sCheck.v1 = 0; | 
| +  sCheck.v2 = 0; | 
| *pnErr = 0; | 
| if( sCheck.nPage==0 ){ | 
| sqlite3BtreeLeave(p); | 
| return 0; | 
| } | 
| -  sCheck.anRef = sqlite3Malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) ); | 
| -  if( !sCheck.anRef ){ | 
| + | 
| +  sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); | 
| +  if( !sCheck.aPgRef ){ | 
| *pnErr = 1; | 
| sqlite3BtreeLeave(p); | 
| return 0; | 
| } | 
| -  for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; } | 
| i = PENDING_BYTE_PAGE(pBt); | 
| -  if( i<=sCheck.nPage ){ | 
| -    sCheck.anRef[i] = 1; | 
| -  } | 
| -  sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), 20000); | 
| +  if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); | 
| +  sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); | 
| sCheck.errMsg.useMalloc = 2; | 
|  | 
| /* Check the integrity of the freelist | 
| */ | 
| +  sCheck.zPfx = "Main freelist: "; | 
| checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), | 
| -            get4byte(&pBt->pPage1->aData[36]), "Main freelist: "); | 
| +            get4byte(&pBt->pPage1->aData[36])); | 
| +  sCheck.zPfx = 0; | 
|  | 
| /* Check all the tables. | 
| */ | 
| @@ -7846,30 +8393,32 @@ char *sqlite3BtreeIntegrityCheck( | 
| if( aRoot[i]==0 ) continue; | 
| #ifndef SQLITE_OMIT_AUTOVACUUM | 
| if( pBt->autoVacuum && aRoot[i]>1 ){ | 
| -      checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0); | 
| +      checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); | 
| } | 
| #endif | 
| -    checkTreePage(&sCheck, aRoot[i], "List of tree roots: ", NULL, NULL); | 
| +    sCheck.zPfx = "List of tree roots: "; | 
| +    checkTreePage(&sCheck, aRoot[i], NULL, NULL); | 
| +    sCheck.zPfx = 0; | 
| } | 
|  | 
| /* Make sure every page in the file is referenced | 
| */ | 
| for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ | 
| #ifdef SQLITE_OMIT_AUTOVACUUM | 
| -    if( sCheck.anRef[i]==0 ){ | 
| -      checkAppendMsg(&sCheck, 0, "Page %d is never used", i); | 
| +    if( getPageReferenced(&sCheck, i)==0 ){ | 
| +      checkAppendMsg(&sCheck, "Page %d is never used", i); | 
| } | 
| #else | 
| /* If the database supports auto-vacuum, make sure no tables contain | 
| ** references to pointer-map pages. | 
| */ | 
| -    if( sCheck.anRef[i]==0 && | 
| +    if( getPageReferenced(&sCheck, i)==0 && | 
| (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ | 
| -      checkAppendMsg(&sCheck, 0, "Page %d is never used", i); | 
| +      checkAppendMsg(&sCheck, "Page %d is never used", i); | 
| } | 
| -    if( sCheck.anRef[i]!=0 && | 
| +    if( getPageReferenced(&sCheck, i)!=0 && | 
| (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ | 
| -      checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i); | 
| +      checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); | 
| } | 
| #endif | 
| } | 
| @@ -7879,7 +8428,7 @@ char *sqlite3BtreeIntegrityCheck( | 
| ** of the integrity check. | 
| */ | 
| if( NEVER(nRef != sqlite3PagerRefcount(pBt->pPager)) ){ | 
| -    checkAppendMsg(&sCheck, 0, | 
| +    checkAppendMsg(&sCheck, | 
| "Outstanding page count goes from %d to %d during this analysis", | 
| nRef, sqlite3PagerRefcount(pBt->pPager) | 
| ); | 
| @@ -7888,7 +8437,7 @@ char *sqlite3BtreeIntegrityCheck( | 
| /* Clean  up and report errors. | 
| */ | 
| sqlite3BtreeLeave(p); | 
| -  sqlite3_free(sCheck.anRef); | 
| +  sqlite3_free(sCheck.aPgRef); | 
| if( sCheck.mallocFailed ){ | 
| sqlite3StrAccumReset(&sCheck.errMsg); | 
| *pnErr = sCheck.nErr+1; | 
| @@ -7901,14 +8450,15 @@ char *sqlite3BtreeIntegrityCheck( | 
| #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ | 
|  | 
| /* | 
| -** Return the full pathname of the underlying database file. | 
| +** Return the full pathname of the underlying database file.  Return | 
| +** an empty string if the database is in-memory or a TEMP database. | 
| ** | 
| ** The pager filename is invariant as long as the pager is | 
| ** open so it is safe to access without the BtShared mutex. | 
| */ | 
| const char *sqlite3BtreeGetFilename(Btree *p){ | 
| assert( p->pBt->pPager!=0 ); | 
| -  return sqlite3PagerFilename(p->pBt->pPager); | 
| +  return sqlite3PagerFilename(p->pBt->pPager, 1); | 
| } | 
|  | 
| /* | 
| @@ -8059,7 +8609,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ | 
| int rc; | 
| assert( cursorHoldsMutex(pCsr) ); | 
| assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); | 
| -  assert( pCsr->isIncrblobHandle ); | 
| +  assert( pCsr->curFlags & BTCF_Incrblob ); | 
|  | 
| rc = restoreCursorPosition(pCsr); | 
| if( rc!=SQLITE_OK ){ | 
| @@ -8070,6 +8620,17 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ | 
| return SQLITE_ABORT; | 
| } | 
|  | 
| +  /* Save the positions of all other cursors open on this table. This is | 
| +  ** required in case any of them are holding references to an xFetch | 
| +  ** version of the b-tree page modified by the accessPayload call below. | 
| +  ** | 
| +  ** Note that pCsr must be open on a INTKEY table and saveCursorPosition() | 
| +  ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence | 
| +  ** saveAllCursors can only return SQLITE_OK. | 
| +  */ | 
| +  VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); | 
| +  assert( rc==SQLITE_OK ); | 
| + | 
| /* Check some assumptions: | 
| **   (a) the cursor is open for writing, | 
| **   (b) there is a read/write transaction open, | 
| @@ -8077,10 +8638,11 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ | 
| **   (d) there are no conflicting read-locks, and | 
| **   (e) the cursor points at a valid row of an intKey table. | 
| */ | 
| -  if( !pCsr->wrFlag ){ | 
| +  if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){ | 
| return SQLITE_READONLY; | 
| } | 
| -  assert( !pCsr->pBt->readOnly && pCsr->pBt->inTransaction==TRANS_WRITE ); | 
| +  assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0 | 
| +              && pCsr->pBt->inTransaction==TRANS_WRITE ); | 
| assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, 0, 2) ); | 
| assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); | 
| assert( pCsr->apPage[pCsr->iPage]->intKey ); | 
| @@ -8089,20 +8651,10 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ | 
| } | 
|  | 
| /* | 
| -** Set a flag on this cursor to cache the locations of pages from the | 
| -** overflow list for the current row. This is used by cursors opened | 
| -** for incremental blob IO only. | 
| -** | 
| -** This function sets a flag only. The actual page location cache | 
| -** (stored in BtCursor.aOverflow[]) is allocated and used by function | 
| -** accessPayload() (the worker function for sqlite3BtreeData() and | 
| -** sqlite3BtreePutData()). | 
| +** Mark this cursor as an incremental blob cursor. | 
| */ | 
| -void sqlite3BtreeCacheOverflow(BtCursor *pCur){ | 
| -  assert( cursorHoldsMutex(pCur) ); | 
| -  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); | 
| -  invalidateOverflowCache(pCur); | 
| -  pCur->isIncrblobHandle = 1; | 
| +void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ | 
| +  pCur->curFlags |= BTCF_Incrblob; | 
| } | 
| #endif | 
|  | 
| @@ -8115,13 +8667,13 @@ int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ | 
| BtShared *pBt = pBtree->pBt; | 
| int rc;                         /* Return code */ | 
|  | 
| -  assert( pBtree->inTrans==TRANS_NONE ); | 
| assert( iVersion==1 || iVersion==2 ); | 
|  | 
| /* If setting the version fields to 1, do not automatically open the | 
| ** WAL connection, even if the version fields are currently set to 2. | 
| */ | 
| -  pBt->doNotUseWAL = (u8)(iVersion==1); | 
| +  pBt->btsFlags &= ~BTS_NO_WAL; | 
| +  if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL; | 
|  | 
| rc = sqlite3BtreeBeginTrans(pBtree, 0); | 
| if( rc==SQLITE_OK ){ | 
| @@ -8138,6 +8690,22 @@ int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ | 
| } | 
| } | 
|  | 
| -  pBt->doNotUseWAL = 0; | 
| +  pBt->btsFlags &= ~BTS_NO_WAL; | 
| return rc; | 
| } | 
| + | 
| +/* | 
| +** set the mask of hint flags for cursor pCsr. Currently the only valid | 
| +** values are 0 and BTREE_BULKLOAD. | 
| +*/ | 
| +void sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){ | 
| +  assert( mask==BTREE_BULKLOAD || mask==0 ); | 
| +  pCsr->hints = mask; | 
| +} | 
| + | 
| +/* | 
| +** Return true if the given Btree is read-only. | 
| +*/ | 
| +int sqlite3BtreeIsReadonly(Btree *p){ | 
| +  return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; | 
| +} | 
|  |