| Index: third_party/sqlite/src/src/wal.c | 
| diff --git a/third_party/sqlite/src/src/wal.c b/third_party/sqlite/src/src/wal.c | 
| index 51ea18fb21677476c9f4ac626c41c1b1a8ef6267..d134a8b52a31089b555ada725aa259c94646286c 100644 | 
| --- a/third_party/sqlite/src/src/wal.c | 
| +++ b/third_party/sqlite/src/src/wal.c | 
| @@ -142,14 +142,15 @@ | 
| ** byte order of the host computer. | 
| ** | 
| ** The purpose of the wal-index is to answer this question quickly:  Given | 
| -** a page number P, return the index of the last frame for page P in the WAL, | 
| -** or return NULL if there are no frames for page P in the WAL. | 
| +** a page number P and a maximum frame index M, return the index of the | 
| +** last frame in the wal before frame M for page P in the WAL, or return | 
| +** NULL if there are no frames for page P in the WAL prior to M. | 
| ** | 
| ** The wal-index consists of a header region, followed by an one or | 
| ** more index blocks. | 
| ** | 
| ** The wal-index header contains the total number of frames within the WAL | 
| -** in the the mxFrame field. | 
| +** in the mxFrame field. | 
| ** | 
| ** Each index block except for the first contains information on | 
| ** HASHTABLE_NPAGE frames. The first index block contains information on | 
| @@ -412,14 +413,20 @@ struct Wal { | 
| sqlite3_file *pDbFd;       /* File handle for the database file */ | 
| sqlite3_file *pWalFd;      /* File handle for WAL file */ | 
| u32 iCallback;             /* Value to pass to log callback (or 0) */ | 
| +  i64 mxWalSize;             /* Truncate WAL to this size upon reset */ | 
| int nWiData;               /* Size of array apWiData */ | 
| +  int szFirstBlock;          /* Size of first block written to WAL file */ | 
| volatile u32 **apWiData;   /* Pointer to wal-index content in memory */ | 
| u32 szPage;                /* Database page size */ | 
| i16 readLock;              /* Which read lock is being held.  -1 for none */ | 
| +  u8 syncFlags;              /* Flags to use to sync header writes */ | 
| u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */ | 
| u8 writeLock;              /* True if in a write transaction */ | 
| u8 ckptLock;               /* True if holding a checkpoint lock */ | 
| -  u8 readOnly;               /* True if the WAL file is open read-only */ | 
| +  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ | 
| +  u8 truncateOnCommit;       /* True to truncate WAL file on commit */ | 
| +  u8 syncHeader;             /* Fsync the WAL header if true */ | 
| +  u8 padToSectorBoundary;    /* Pad transactions out to the next sector */ | 
| WalIndexHdr hdr;           /* Wal-index header for current transaction */ | 
| const char *zWalName;      /* Name of WAL file */ | 
| u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */ | 
| @@ -436,6 +443,13 @@ struct Wal { | 
| #define WAL_HEAPMEMORY_MODE 2 | 
|  | 
| /* | 
| +** Possible values for WAL.readOnly | 
| +*/ | 
| +#define WAL_RDWR        0    /* Normal read/write connection */ | 
| +#define WAL_RDONLY      1    /* The WAL file is readonly */ | 
| +#define WAL_SHM_RDONLY  2    /* The SHM file is readonly */ | 
| + | 
| +/* | 
| ** Each page of the wal-index mapping contains a hash-table made up of | 
| ** an array of HASHTABLE_NSLOT elements of the following type. | 
| */ | 
| @@ -528,6 +542,10 @@ static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ | 
| rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, | 
| pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] | 
| ); | 
| +      if( rc==SQLITE_READONLY ){ | 
| +        pWal->readOnly |= WAL_SHM_RDONLY; | 
| +        rc = SQLITE_OK; | 
| +      } | 
| } | 
| } | 
|  | 
| @@ -556,7 +574,7 @@ static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ | 
| ** The argument to this macro must be of type u32. On a little-endian | 
| ** architecture, it returns the u32 value that results from interpreting | 
| ** the 4 bytes as a big-endian value. On a big-endian architecture, it | 
| -** returns the value that would be produced by intepreting the 4 bytes | 
| +** returns the value that would be produced by interpreting the 4 bytes | 
| ** of the input value as a little-endian integer. | 
| */ | 
| #define BYTESWAP32(x) ( \ | 
| @@ -970,7 +988,7 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ | 
| assert( idx <= HASHTABLE_NSLOT/2 + 1 ); | 
|  | 
| /* If this is the first entry to be added to this hash-table, zero the | 
| -    ** entire hash table and aPgno[] array before proceding. | 
| +    ** entire hash table and aPgno[] array before proceeding. | 
| */ | 
| if( idx==1 ){ | 
| int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]); | 
| @@ -1081,6 +1099,7 @@ static int walIndexRecover(Wal *pWal){ | 
| int szPage;                   /* Page size according to the log */ | 
| u32 magic;                    /* Magic value read from WAL header */ | 
| u32 version;                  /* Magic value read from WAL header */ | 
| +    int isValid;                  /* True if this frame is valid */ | 
|  | 
| /* Read in the WAL header. */ | 
| rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); | 
| @@ -1139,14 +1158,14 @@ static int walIndexRecover(Wal *pWal){ | 
| for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ | 
| u32 pgno;                   /* Database page number for frame */ | 
| u32 nTruncate;              /* dbsize field from frame header */ | 
| -      int isValid;                /* True if this frame is valid */ | 
|  | 
| /* Read and decode the next log frame. */ | 
| +      iFrame++; | 
| rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); | 
| if( rc!=SQLITE_OK ) break; | 
| isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); | 
| if( !isValid ) break; | 
| -      rc = walIndexAppend(pWal, ++iFrame, pgno); | 
| +      rc = walIndexAppend(pWal, iFrame, pgno); | 
| if( rc!=SQLITE_OK ) break; | 
|  | 
| /* If nTruncate is non-zero, this is a commit record. */ | 
| @@ -1180,6 +1199,7 @@ finished: | 
| pInfo->nBackfill = 0; | 
| pInfo->aReadMark[0] = 0; | 
| for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | 
| +    if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; | 
|  | 
| /* If more than one frame was recovered from the log file, report an | 
| ** event via sqlite3_log(). This is to help with identifying performance | 
| @@ -1187,8 +1207,9 @@ finished: | 
| ** checkpointing the log file. | 
| */ | 
| if( pWal->hdr.nPage ){ | 
| -      sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s", | 
| -          pWal->hdr.nPage, pWal->zWalName | 
| +      sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, | 
| +          "recovered %d frames from WAL file %s", | 
| +          pWal->hdr.mxFrame, pWal->zWalName | 
| ); | 
| } | 
| } | 
| @@ -1234,6 +1255,7 @@ int sqlite3WalOpen( | 
| sqlite3_file *pDbFd,            /* The open database file */ | 
| const char *zWalName,           /* Name of the WAL file */ | 
| int bNoShm,                     /* True to run in heap-memory mode */ | 
| +  i64 mxWalSize,                  /* Truncate WAL to this size on reset */ | 
| Wal **ppWal                     /* OUT: Allocated Wal handle */ | 
| ){ | 
| int rc;                         /* Return Code */ | 
| @@ -1266,14 +1288,17 @@ int sqlite3WalOpen( | 
| pRet->pWalFd = (sqlite3_file *)&pRet[1]; | 
| pRet->pDbFd = pDbFd; | 
| pRet->readLock = -1; | 
| +  pRet->mxWalSize = mxWalSize; | 
| pRet->zWalName = zWalName; | 
| +  pRet->syncHeader = 1; | 
| +  pRet->padToSectorBoundary = 1; | 
| pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); | 
|  | 
| /* Open file handle on the write-ahead log file. */ | 
| flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); | 
| rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); | 
| if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ | 
| -    pRet->readOnly = 1; | 
| +    pRet->readOnly = WAL_RDONLY; | 
| } | 
|  | 
| if( rc!=SQLITE_OK ){ | 
| @@ -1281,6 +1306,11 @@ int sqlite3WalOpen( | 
| sqlite3OsClose(pRet->pWalFd); | 
| sqlite3_free(pRet); | 
| }else{ | 
| +    int iDC = sqlite3OsDeviceCharacteristics(pDbFd); | 
| +    if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } | 
| +    if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ | 
| +      pRet->padToSectorBoundary = 0; | 
| +    } | 
| *ppWal = pRet; | 
| WALTRACE(("WAL%d: opened\n", pRet)); | 
| } | 
| @@ -1288,6 +1318,13 @@ int sqlite3WalOpen( | 
| } | 
|  | 
| /* | 
| +** Change the size to which the WAL file is trucated on each reset. | 
| +*/ | 
| +void sqlite3WalLimit(Wal *pWal, i64 iLimit){ | 
| +  if( pWal ) pWal->mxWalSize = iLimit; | 
| +} | 
| + | 
| +/* | 
| ** Find the smallest page number out of all pages held in the WAL that | 
| ** has not been returned by any prior invocation of this method on the | 
| ** same WalIterator object.   Write into *piFrame the frame index where | 
| @@ -1609,7 +1646,7 @@ static int walPagesize(Wal *pWal){ | 
| ** database file. | 
| ** | 
| ** This routine uses and updates the nBackfill field of the wal-index header. | 
| -** This is the only routine tha will increase the value of nBackfill. | 
| +** This is the only routine that will increase the value of nBackfill. | 
| ** (A WAL reset or recovery will revert nBackfill to zero, but not increase | 
| ** its value.) | 
| ** | 
| @@ -1664,7 +1701,7 @@ static int walCheckpoint( | 
| assert( y<=pWal->hdr.mxFrame ); | 
| rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); | 
| if( rc==SQLITE_OK ){ | 
| -        pInfo->aReadMark[i] = READMARK_NOT_USED; | 
| +        pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); | 
| walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); | 
| }else if( rc==SQLITE_BUSY ){ | 
| mxSafeFrame = y; | 
| @@ -1686,17 +1723,18 @@ static int walCheckpoint( | 
| rc = sqlite3OsSync(pWal->pWalFd, sync_flags); | 
| } | 
|  | 
| -    /* If the database file may grow as a result of this checkpoint, hint | 
| -    ** about the eventual size of the db file to the VFS layer. | 
| +    /* If the database may grow as a result of this checkpoint, hint | 
| +    ** about the eventual size of the db file to the VFS layer. | 
| */ | 
| if( rc==SQLITE_OK ){ | 
| i64 nReq = ((i64)mxPage * szPage); | 
| rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); | 
| if( rc==SQLITE_OK && nSize<nReq ){ | 
| -        sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); | 
| +        sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); | 
| } | 
| } | 
|  | 
| + | 
| /* Iterate through the contents of the WAL, copying data to the db file. */ | 
| while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ | 
| i64 iOffset; | 
| @@ -1761,6 +1799,24 @@ static int walCheckpoint( | 
| } | 
|  | 
| /* | 
| +** If the WAL file is currently larger than nMax bytes in size, truncate | 
| +** it to exactly nMax bytes. If an error occurs while doing so, ignore it. | 
| +*/ | 
| +static void walLimitSize(Wal *pWal, i64 nMax){ | 
| +  i64 sz; | 
| +  int rx; | 
| +  sqlite3BeginBenignMalloc(); | 
| +  rx = sqlite3OsFileSize(pWal->pWalFd, &sz); | 
| +  if( rx==SQLITE_OK && (sz > nMax ) ){ | 
| +    rx = sqlite3OsTruncate(pWal->pWalFd, nMax); | 
| +  } | 
| +  sqlite3EndBenignMalloc(); | 
| +  if( rx ){ | 
| +    sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); | 
| +  } | 
| +} | 
| + | 
| +/* | 
| ** Close a connection to a log file. | 
| */ | 
| int sqlite3WalClose( | 
| @@ -1790,14 +1846,33 @@ int sqlite3WalClose( | 
| pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 | 
| ); | 
| if( rc==SQLITE_OK ){ | 
| -        isDelete = 1; | 
| +        int bPersist = -1; | 
| +        sqlite3OsFileControlHint( | 
| +            pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist | 
| +        ); | 
| +        if( bPersist!=1 ){ | 
| +          /* Try to delete the WAL file if the checkpoint completed and | 
| +          ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal | 
| +          ** mode (!bPersist) */ | 
| +          isDelete = 1; | 
| +        }else if( pWal->mxWalSize>=0 ){ | 
| +          /* Try to truncate the WAL file to zero bytes if the checkpoint | 
| +          ** completed and fsynced (rc==SQLITE_OK) and we are in persistent | 
| +          ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a | 
| +          ** non-negative value (pWal->mxWalSize>=0).  Note that we truncate | 
| +          ** to zero bytes as truncating to the journal_size_limit might | 
| +          ** leave a corrupt WAL file on disk. */ | 
| +          walLimitSize(pWal, 0); | 
| +        } | 
| } | 
| } | 
|  | 
| walIndexClose(pWal, isDelete); | 
| sqlite3OsClose(pWal->pWalFd); | 
| if( isDelete ){ | 
| +      sqlite3BeginBenignMalloc(); | 
| sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); | 
| +      sqlite3EndBenignMalloc(); | 
| } | 
| WALTRACE(("WAL%p: closed\n", pWal)); | 
| sqlite3_free((void *)pWal->apWiData); | 
| @@ -1875,7 +1950,7 @@ static int walIndexTryHdr(Wal *pWal, int *pChanged){ | 
| ** wal-index from the WAL before returning. | 
| ** | 
| ** Set *pChanged to 1 if the wal-index header value in pWal->hdr is | 
| -** changed by this opertion.  If pWal->hdr is unchanged, set *pChanged | 
| +** changed by this operation.  If pWal->hdr is unchanged, set *pChanged | 
| ** to 0. | 
| ** | 
| ** If the wal-index header is successfully read, return SQLITE_OK. | 
| @@ -1907,21 +1982,28 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ | 
| ** with a writer.  So get a WRITE lock and try again. | 
| */ | 
| assert( badHdr==0 || pWal->writeLock==0 ); | 
| -  if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ | 
| -    pWal->writeLock = 1; | 
| -    if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ | 
| -      badHdr = walIndexTryHdr(pWal, pChanged); | 
| -      if( badHdr ){ | 
| -        /* If the wal-index header is still malformed even while holding | 
| -        ** a WRITE lock, it can only mean that the header is corrupted and | 
| -        ** needs to be reconstructed.  So run recovery to do exactly that. | 
| -        */ | 
| -        rc = walIndexRecover(pWal); | 
| -        *pChanged = 1; | 
| +  if( badHdr ){ | 
| +    if( pWal->readOnly & WAL_SHM_RDONLY ){ | 
| +      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ | 
| +        walUnlockShared(pWal, WAL_WRITE_LOCK); | 
| +        rc = SQLITE_READONLY_RECOVERY; | 
| +      } | 
| +    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ | 
| +      pWal->writeLock = 1; | 
| +      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ | 
| +        badHdr = walIndexTryHdr(pWal, pChanged); | 
| +        if( badHdr ){ | 
| +          /* If the wal-index header is still malformed even while holding | 
| +          ** a WRITE lock, it can only mean that the header is corrupted and | 
| +          ** needs to be reconstructed.  So run recovery to do exactly that. | 
| +          */ | 
| +          rc = walIndexRecover(pWal); | 
| +          *pChanged = 1; | 
| +        } | 
| } | 
| +      pWal->writeLock = 0; | 
| +      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 
| } | 
| -    pWal->writeLock = 0; | 
| -    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 
| } | 
|  | 
| /* If the header is read successfully, check the version number to make | 
| @@ -2014,8 +2096,8 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ | 
| ** calls to sqlite3OsSleep() have a delay of 1 microsecond.  Really this | 
| ** is more of a scheduler yield than an actual delay.  But on the 10th | 
| ** an subsequent retries, the delays start becoming longer and longer, | 
| -  ** so that on the 100th (and last) RETRY we delay for 21 milliseconds. | 
| -  ** The total delay time before giving up is less than 1 second. | 
| +  ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. | 
| +  ** The total delay time before giving up is less than 10 seconds. | 
| */ | 
| if( cnt>5 ){ | 
| int nDelay = 1;                      /* Pause time in microseconds */ | 
| @@ -2023,7 +2105,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ | 
| VVA_ONLY( pWal->lockError = 1; ) | 
| return SQLITE_PROTOCOL; | 
| } | 
| -    if( cnt>=10 ) nDelay = (cnt-9)*238;  /* Max delay 21ms. Total delay 996ms */ | 
| +    if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; | 
| sqlite3OsSleep(pWal->pVfs, nDelay); | 
| } | 
|  | 
| @@ -2072,7 +2154,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ | 
| ** may have been appended to the log before READ_LOCK(0) was obtained. | 
| ** When holding READ_LOCK(0), the reader ignores the entire log file, | 
| ** which implies that the database file contains a trustworthy | 
| -        ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from | 
| +        ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from | 
| ** happening, this is usually correct. | 
| ** | 
| ** However, if frames have been appended to the log (or if the log | 
| @@ -2108,7 +2190,9 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ | 
| } | 
| /* There was once an "if" here. The extra "{" is to preserve indentation. */ | 
| { | 
| -    if( mxReadMark < pWal->hdr.mxFrame || mxI==0 ){ | 
| +    if( (pWal->readOnly & WAL_SHM_RDONLY)==0 | 
| +     && (mxReadMark<pWal->hdr.mxFrame || mxI==0) | 
| +    ){ | 
| for(i=1; i<WAL_NREADER; i++){ | 
| rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); | 
| if( rc==SQLITE_OK ){ | 
| @@ -2122,8 +2206,8 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ | 
| } | 
| } | 
| if( mxI==0 ){ | 
| -      assert( rc==SQLITE_BUSY ); | 
| -      return WAL_RETRY; | 
| +      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); | 
| +      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK; | 
| } | 
|  | 
| rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); | 
| @@ -2205,19 +2289,17 @@ void sqlite3WalEndReadTransaction(Wal *pWal){ | 
| } | 
|  | 
| /* | 
| -** Read a page from the WAL, if it is present in the WAL and if the | 
| -** current read transaction is configured to use the WAL. | 
| +** Search the wal file for page pgno. If found, set *piRead to the frame that | 
| +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead | 
| +** to zero. | 
| ** | 
| -** The *pInWal is set to 1 if the requested page is in the WAL and | 
| -** has been loaded.  Or *pInWal is set to 0 if the page was not in | 
| -** the WAL and needs to be read out of the database. | 
| +** Return SQLITE_OK if successful, or an error code if an error occurs. If an | 
| +** error does occur, the final value of *piRead is undefined. | 
| */ | 
| -int sqlite3WalRead( | 
| +int sqlite3WalFindFrame( | 
| Wal *pWal,                      /* WAL handle */ | 
| Pgno pgno,                      /* Database page number to read data for */ | 
| -  int *pInWal,                    /* OUT: True if data is read from WAL */ | 
| -  int nOut,                       /* Size of buffer pOut in bytes */ | 
| -  u8 *pOut                        /* Buffer to write page data to */ | 
| +  u32 *piRead                     /* OUT: Frame number (or zero) */ | 
| ){ | 
| u32 iRead = 0;                  /* If !=0, WAL frame to return data from */ | 
| u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */ | 
| @@ -2233,7 +2315,7 @@ int sqlite3WalRead( | 
| ** WAL were empty. | 
| */ | 
| if( iLast==0 || pWal->readLock==0 ){ | 
| -    *pInWal = 0; | 
| +    *piRead = 0; | 
| return SQLITE_OK; | 
| } | 
|  | 
| @@ -2278,7 +2360,7 @@ int sqlite3WalRead( | 
| for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ | 
| u32 iFrame = aHash[iKey] + iZero; | 
| if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ | 
| -        assert( iFrame>iRead ); | 
| +        /* assert( iFrame>iRead ); -- not true if there is corruption */ | 
| iRead = iFrame; | 
| } | 
| if( (nCollide--)==0 ){ | 
| @@ -2304,26 +2386,31 @@ int sqlite3WalRead( | 
| } | 
| #endif | 
|  | 
| -  /* If iRead is non-zero, then it is the log frame number that contains the | 
| -  ** required page. Read and return data from the log file. | 
| -  */ | 
| -  if( iRead ){ | 
| -    int sz; | 
| -    i64 iOffset; | 
| -    sz = pWal->hdr.szPage; | 
| -    sz = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); | 
| -    testcase( sz<=32768 ); | 
| -    testcase( sz>=65536 ); | 
| -    iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; | 
| -    *pInWal = 1; | 
| -    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | 
| -    return sqlite3OsRead(pWal->pWalFd, pOut, nOut, iOffset); | 
| -  } | 
| - | 
| -  *pInWal = 0; | 
| +  *piRead = iRead; | 
| return SQLITE_OK; | 
| } | 
|  | 
| +/* | 
| +** Read the contents of frame iRead from the wal file into buffer pOut | 
| +** (which is nOut bytes in size). Return SQLITE_OK if successful, or an | 
| +** error code otherwise. | 
| +*/ | 
| +int sqlite3WalReadFrame( | 
| +  Wal *pWal,                      /* WAL handle */ | 
| +  u32 iRead,                      /* Frame to read */ | 
| +  int nOut,                       /* Size of buffer pOut in bytes */ | 
| +  u8 *pOut                        /* Buffer to write page data to */ | 
| +){ | 
| +  int sz; | 
| +  i64 iOffset; | 
| +  sz = pWal->hdr.szPage; | 
| +  sz = (sz&0xfe00) + ((sz&0x0001)<<16); | 
| +  testcase( sz<=32768 ); | 
| +  testcase( sz>=65536 ); | 
| +  iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; | 
| +  /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | 
| +  return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); | 
| +} | 
|  | 
| /* | 
| ** Return the size of the database in pages (or zero, if unknown). | 
| @@ -2376,7 +2463,7 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){ | 
| if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ | 
| walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 
| pWal->writeLock = 0; | 
| -    rc = SQLITE_BUSY; | 
| +    rc = SQLITE_BUSY_SNAPSHOT; | 
| } | 
|  | 
| return rc; | 
| @@ -2390,6 +2477,7 @@ int sqlite3WalEndWriteTransaction(Wal *pWal){ | 
| if( pWal->writeLock ){ | 
| walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 
| pWal->writeLock = 0; | 
| +    pWal->truncateOnCommit = 0; | 
| } | 
| return SQLITE_OK; | 
| } | 
| @@ -2435,9 +2523,8 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ | 
| assert( walFramePgno(pWal, iFrame)!=1 ); | 
| rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); | 
| } | 
| -    walCleanupHash(pWal); | 
| +    if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); | 
| } | 
| -  assert( rc==SQLITE_OK ); | 
| return rc; | 
| } | 
|  | 
| @@ -2486,6 +2573,7 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ | 
| return rc; | 
| } | 
|  | 
| + | 
| /* | 
| ** This function is called just before writing a set of frames to the log | 
| ** file (see sqlite3WalFrames()). It checks to see if, instead of appending | 
| @@ -2522,13 +2610,15 @@ static int walRestartLog(Wal *pWal){ | 
| */ | 
| int i;                    /* Loop counter */ | 
| u32 *aSalt = pWal->hdr.aSalt;       /* Big-endian salt values */ | 
| + | 
| pWal->nCkpt++; | 
| pWal->hdr.mxFrame = 0; | 
| sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); | 
| aSalt[1] = salt1; | 
| walIndexWriteHdr(pWal); | 
| pInfo->nBackfill = 0; | 
| -        for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | 
| +        pInfo->aReadMark[1] = 0; | 
| +        for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | 
| assert( pInfo->aReadMark[0]==0 ); | 
| walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); | 
| }else if( rc!=SQLITE_BUSY ){ | 
| @@ -2550,6 +2640,74 @@ static int walRestartLog(Wal *pWal){ | 
| return rc; | 
| } | 
|  | 
| +/* | 
| +** Information about the current state of the WAL file and where | 
| +** the next fsync should occur - passed from sqlite3WalFrames() into | 
| +** walWriteToLog(). | 
| +*/ | 
| +typedef struct WalWriter { | 
| +  Wal *pWal;                   /* The complete WAL information */ | 
| +  sqlite3_file *pFd;           /* The WAL file to which we write */ | 
| +  sqlite3_int64 iSyncPoint;    /* Fsync at this offset */ | 
| +  int syncFlags;               /* Flags for the fsync */ | 
| +  int szPage;                  /* Size of one page */ | 
| +} WalWriter; | 
| + | 
| +/* | 
| +** Write iAmt bytes of content into the WAL file beginning at iOffset. | 
| +** Do a sync when crossing the p->iSyncPoint boundary. | 
| +** | 
| +** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, | 
| +** first write the part before iSyncPoint, then sync, then write the | 
| +** rest. | 
| +*/ | 
| +static int walWriteToLog( | 
| +  WalWriter *p,              /* WAL to write to */ | 
| +  void *pContent,            /* Content to be written */ | 
| +  int iAmt,                  /* Number of bytes to write */ | 
| +  sqlite3_int64 iOffset      /* Start writing at this offset */ | 
| +){ | 
| +  int rc; | 
| +  if( iOffset<p->iSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ | 
| +    int iFirstAmt = (int)(p->iSyncPoint - iOffset); | 
| +    rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); | 
| +    if( rc ) return rc; | 
| +    iOffset += iFirstAmt; | 
| +    iAmt -= iFirstAmt; | 
| +    pContent = (void*)(iFirstAmt + (char*)pContent); | 
| +    assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); | 
| +    rc = sqlite3OsSync(p->pFd, p->syncFlags & SQLITE_SYNC_MASK); | 
| +    if( iAmt==0 || rc ) return rc; | 
| +  } | 
| +  rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); | 
| +  return rc; | 
| +} | 
| + | 
| +/* | 
| +** Write out a single frame of the WAL | 
| +*/ | 
| +static int walWriteOneFrame( | 
| +  WalWriter *p,               /* Where to write the frame */ | 
| +  PgHdr *pPage,               /* The page of the frame to be written */ | 
| +  int nTruncate,              /* The commit flag.  Usually 0.  >0 for commit */ | 
| +  sqlite3_int64 iOffset       /* Byte offset at which to write */ | 
| +){ | 
| +  int rc;                         /* Result code from subfunctions */ | 
| +  void *pData;                    /* Data actually written */ | 
| +  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */ | 
| +#if defined(SQLITE_HAS_CODEC) | 
| +  if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; | 
| +#else | 
| +  pData = pPage->pData; | 
| +#endif | 
| +  walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); | 
| +  rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); | 
| +  if( rc ) return rc; | 
| +  /* Write the page data */ | 
| +  rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); | 
| +  return rc; | 
| +} | 
| + | 
| /* | 
| ** Write a set of frames to the log. The caller must hold the write-lock | 
| ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). | 
| @@ -2564,14 +2722,20 @@ int sqlite3WalFrames( | 
| ){ | 
| int rc;                         /* Used to catch return codes */ | 
| u32 iFrame;                     /* Next frame address */ | 
| -  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */ | 
| PgHdr *p;                       /* Iterator to run through pList with. */ | 
| PgHdr *pLast = 0;               /* Last frame in list */ | 
| -  int nLast = 0;                  /* Number of extra copies of last page */ | 
| +  int nExtra = 0;                 /* Number of extra copies of last page */ | 
| +  int szFrame;                    /* The size of a single frame */ | 
| +  i64 iOffset;                    /* Next byte to write in WAL file */ | 
| +  WalWriter w;                    /* The writer */ | 
|  | 
| assert( pList ); | 
| assert( pWal->writeLock ); | 
|  | 
| +  /* If this frame set completes a transaction, then nTruncate>0.  If | 
| +  ** nTruncate==0 then this frame set does not complete the transaction. */ | 
| +  assert( (isCommit!=0)==(nTruncate!=0) ); | 
| + | 
| #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) | 
| { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} | 
| WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", | 
| @@ -2599,7 +2763,7 @@ int sqlite3WalFrames( | 
| sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); | 
| sqlite3Put4byte(&aWalHdr[8], szPage); | 
| sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); | 
| -    sqlite3_randomness(8, pWal->hdr.aSalt); | 
| +    if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); | 
| memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); | 
| walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); | 
| sqlite3Put4byte(&aWalHdr[24], aCksum[0]); | 
| @@ -2609,77 +2773,89 @@ int sqlite3WalFrames( | 
| pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; | 
| pWal->hdr.aFrameCksum[0] = aCksum[0]; | 
| pWal->hdr.aFrameCksum[1] = aCksum[1]; | 
| +    pWal->truncateOnCommit = 1; | 
|  | 
| rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); | 
| WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); | 
| if( rc!=SQLITE_OK ){ | 
| return rc; | 
| } | 
| + | 
| +    /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless | 
| +    ** all syncing is turned off by PRAGMA synchronous=OFF).  Otherwise | 
| +    ** an out-of-order write following a WAL restart could result in | 
| +    ** database corruption.  See the ticket: | 
| +    ** | 
| +    **     http://localhost:591/sqlite/info/ff5be73dee | 
| +    */ | 
| +    if( pWal->syncHeader && sync_flags ){ | 
| +      rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); | 
| +      if( rc ) return rc; | 
| +    } | 
| } | 
| assert( (int)pWal->szPage==szPage ); | 
|  | 
| -  /* Write the log file. */ | 
| -  for(p=pList; p; p=p->pDirty){ | 
| -    u32 nDbsize;                  /* Db-size field for frame header */ | 
| -    i64 iOffset;                  /* Write offset in log file */ | 
| -    void *pData; | 
| - | 
| -    iOffset = walFrameOffset(++iFrame, szPage); | 
| -    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | 
| - | 
| -    /* Populate and write the frame header */ | 
| -    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; | 
| -#if defined(SQLITE_HAS_CODEC) | 
| -    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM; | 
| -#else | 
| -    pData = p->pData; | 
| -#endif | 
| -    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame); | 
| -    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); | 
| -    if( rc!=SQLITE_OK ){ | 
| -      return rc; | 
| -    } | 
| +  /* Setup information needed to write frames into the WAL */ | 
| +  w.pWal = pWal; | 
| +  w.pFd = pWal->pWalFd; | 
| +  w.iSyncPoint = 0; | 
| +  w.syncFlags = sync_flags; | 
| +  w.szPage = szPage; | 
| +  iOffset = walFrameOffset(iFrame+1, szPage); | 
| +  szFrame = szPage + WAL_FRAME_HDRSIZE; | 
|  | 
| -    /* Write the page data */ | 
| -    rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame)); | 
| -    if( rc!=SQLITE_OK ){ | 
| -      return rc; | 
| -    } | 
| +  /* Write all frames into the log file exactly once */ | 
| +  for(p=pList; p; p=p->pDirty){ | 
| +    int nDbSize;   /* 0 normally.  Positive == commit flag */ | 
| +    iFrame++; | 
| +    assert( iOffset==walFrameOffset(iFrame, szPage) ); | 
| +    nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; | 
| +    rc = walWriteOneFrame(&w, p, nDbSize, iOffset); | 
| +    if( rc ) return rc; | 
| pLast = p; | 
| +    iOffset += szFrame; | 
| } | 
|  | 
| -  /* Sync the log file if the 'isSync' flag was specified. */ | 
| -  if( sync_flags ){ | 
| -    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd); | 
| -    i64 iOffset = walFrameOffset(iFrame+1, szPage); | 
| - | 
| -    assert( isCommit ); | 
| -    assert( iSegment>0 ); | 
| - | 
| -    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); | 
| -    while( iOffset<iSegment ){ | 
| -      void *pData; | 
| -#if defined(SQLITE_HAS_CODEC) | 
| -      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM; | 
| -#else | 
| -      pData = pLast->pData; | 
| -#endif | 
| -      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame); | 
| -      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | 
| -      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); | 
| -      if( rc!=SQLITE_OK ){ | 
| -        return rc; | 
| -      } | 
| -      iOffset += WAL_FRAME_HDRSIZE; | 
| -      rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); | 
| -      if( rc!=SQLITE_OK ){ | 
| -        return rc; | 
| +  /* If this is the end of a transaction, then we might need to pad | 
| +  ** the transaction and/or sync the WAL file. | 
| +  ** | 
| +  ** Padding and syncing only occur if this set of frames complete a | 
| +  ** transaction and if PRAGMA synchronous=FULL.  If synchronous==NORMAL | 
| +  ** or synchronous==OFF, then no padding or syncing are needed. | 
| +  ** | 
| +  ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not | 
| +  ** needed and only the sync is done.  If padding is needed, then the | 
| +  ** final frame is repeated (with its commit mark) until the next sector | 
| +  ** boundary is crossed.  Only the part of the WAL prior to the last | 
| +  ** sector boundary is synced; the part of the last frame that extends | 
| +  ** past the sector boundary is written after the sync. | 
| +  */ | 
| +  if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ | 
| +    if( pWal->padToSectorBoundary ){ | 
| +      int sectorSize = sqlite3SectorSize(pWal->pWalFd); | 
| +      w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; | 
| +      while( iOffset<w.iSyncPoint ){ | 
| +        rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset); | 
| +        if( rc ) return rc; | 
| +        iOffset += szFrame; | 
| +        nExtra++; | 
| } | 
| -      nLast++; | 
| -      iOffset += szPage; | 
| +    }else{ | 
| +      rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); | 
| } | 
| +  } | 
|  | 
| -    rc = sqlite3OsSync(pWal->pWalFd, sync_flags); | 
| +  /* If this frame set completes the first transaction in the WAL and | 
| +  ** if PRAGMA journal_size_limit is set, then truncate the WAL to the | 
| +  ** journal size limit, if possible. | 
| +  */ | 
| +  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ | 
| +    i64 sz = pWal->mxWalSize; | 
| +    if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ | 
| +      sz = walFrameOffset(iFrame+nExtra+1, szPage); | 
| +    } | 
| +    walLimitSize(pWal, sz); | 
| +    pWal->truncateOnCommit = 0; | 
| } | 
|  | 
| /* Append data to the wal-index. It is not necessary to lock the | 
| @@ -2692,9 +2868,9 @@ int sqlite3WalFrames( | 
| iFrame++; | 
| rc = walIndexAppend(pWal, iFrame, p->pgno); | 
| } | 
| -  while( nLast>0 && rc==SQLITE_OK ){ | 
| +  while( rc==SQLITE_OK && nExtra>0 ){ | 
| iFrame++; | 
| -    nLast--; | 
| +    nExtra--; | 
| rc = walIndexAppend(pWal, iFrame, pLast->pgno); | 
| } | 
|  | 
| @@ -2747,6 +2923,7 @@ int sqlite3WalCheckpoint( | 
| assert( pWal->ckptLock==0 ); | 
| assert( pWal->writeLock==0 ); | 
|  | 
| +  if( pWal->readOnly ) return SQLITE_READONLY; | 
| WALTRACE(("WAL%p: checkpoint begins\n", pWal)); | 
| rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); | 
| if( rc ){ | 
| @@ -2779,6 +2956,9 @@ int sqlite3WalCheckpoint( | 
| /* Read the wal-index header. */ | 
| if( rc==SQLITE_OK ){ | 
| rc = walIndexReadHdr(pWal, &isChanged); | 
| +    if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ | 
| +      sqlite3OsUnfetch(pWal->pDbFd, 0, 0); | 
| +    } | 
| } | 
|  | 
| /* Copy data from the log to the database file. */ | 
| @@ -2898,4 +3078,16 @@ int sqlite3WalHeapMemory(Wal *pWal){ | 
| return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); | 
| } | 
|  | 
| +#ifdef SQLITE_ENABLE_ZIPVFS | 
| +/* | 
| +** If the argument is not NULL, it points to a Wal object that holds a | 
| +** read-lock. This function returns the database page-size if it is known, | 
| +** or zero if it is not (or if pWal is NULL). | 
| +*/ | 
| +int sqlite3WalFramesize(Wal *pWal){ | 
| +  assert( pWal==0 || pWal->readLock>=0 ); | 
| +  return (pWal ? pWal->szPage : 0); | 
| +} | 
| +#endif | 
| + | 
| #endif /* #ifndef SQLITE_OMIT_WAL */ | 
|  |