OLD | NEW |
1 /* | 1 /* |
2 ** 2010 February 1 | 2 ** 2010 February 1 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
(...skipping 427 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
438 u8 syncFlags; /* Flags to use to sync header writes */ | 438 u8 syncFlags; /* Flags to use to sync header writes */ |
439 u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ | 439 u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ |
440 u8 writeLock; /* True if in a write transaction */ | 440 u8 writeLock; /* True if in a write transaction */ |
441 u8 ckptLock; /* True if holding a checkpoint lock */ | 441 u8 ckptLock; /* True if holding a checkpoint lock */ |
442 u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ | 442 u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ |
443 u8 truncateOnCommit; /* True to truncate WAL file on commit */ | 443 u8 truncateOnCommit; /* True to truncate WAL file on commit */ |
444 u8 syncHeader; /* Fsync the WAL header if true */ | 444 u8 syncHeader; /* Fsync the WAL header if true */ |
445 u8 padToSectorBoundary; /* Pad transactions out to the next sector */ | 445 u8 padToSectorBoundary; /* Pad transactions out to the next sector */ |
446 WalIndexHdr hdr; /* Wal-index header for current transaction */ | 446 WalIndexHdr hdr; /* Wal-index header for current transaction */ |
447 u32 minFrame; /* Ignore wal frames before this one */ | 447 u32 minFrame; /* Ignore wal frames before this one */ |
| 448 u32 iReCksum; /* On commit, recalculate checksums from here */ |
448 const char *zWalName; /* Name of WAL file */ | 449 const char *zWalName; /* Name of WAL file */ |
449 u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ | 450 u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ |
450 #ifdef SQLITE_DEBUG | 451 #ifdef SQLITE_DEBUG |
451 u8 lockError; /* True if a locking error has occurred */ | 452 u8 lockError; /* True if a locking error has occurred */ |
452 #endif | 453 #endif |
453 #ifdef SQLITE_ENABLE_SNAPSHOT | 454 #ifdef SQLITE_ENABLE_SNAPSHOT |
454 WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ | 455 WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ |
455 #endif | 456 #endif |
456 }; | 457 }; |
457 | 458 |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
538 static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ | 539 static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ |
539 int rc = SQLITE_OK; | 540 int rc = SQLITE_OK; |
540 | 541 |
541 /* Enlarge the pWal->apWiData[] array if required */ | 542 /* Enlarge the pWal->apWiData[] array if required */ |
542 if( pWal->nWiData<=iPage ){ | 543 if( pWal->nWiData<=iPage ){ |
543 int nByte = sizeof(u32*)*(iPage+1); | 544 int nByte = sizeof(u32*)*(iPage+1); |
544 volatile u32 **apNew; | 545 volatile u32 **apNew; |
545 apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); | 546 apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); |
546 if( !apNew ){ | 547 if( !apNew ){ |
547 *ppPage = 0; | 548 *ppPage = 0; |
548 return SQLITE_NOMEM; | 549 return SQLITE_NOMEM_BKPT; |
549 } | 550 } |
550 memset((void*)&apNew[pWal->nWiData], 0, | 551 memset((void*)&apNew[pWal->nWiData], 0, |
551 sizeof(u32*)*(iPage+1-pWal->nWiData)); | 552 sizeof(u32*)*(iPage+1-pWal->nWiData)); |
552 pWal->apWiData = apNew; | 553 pWal->apWiData = apNew; |
553 pWal->nWiData = iPage+1; | 554 pWal->nWiData = iPage+1; |
554 } | 555 } |
555 | 556 |
556 /* Request a pointer to the required page from the VFS */ | 557 /* Request a pointer to the required page from the VFS */ |
557 if( pWal->apWiData[iPage]==0 ){ | 558 if( pWal->apWiData[iPage]==0 ){ |
558 if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ | 559 if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ |
559 pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); | 560 pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); |
560 if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM; | 561 if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM_BKPT; |
561 }else{ | 562 }else{ |
562 rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, | 563 rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, |
563 pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] | 564 pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] |
564 ); | 565 ); |
565 if( rc==SQLITE_READONLY ){ | 566 if( rc==SQLITE_READONLY ){ |
566 pWal->readOnly |= WAL_SHM_RDONLY; | 567 pWal->readOnly |= WAL_SHM_RDONLY; |
567 rc = SQLITE_OK; | 568 rc = SQLITE_OK; |
568 } | 569 } |
569 } | 570 } |
570 } | 571 } |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
691 u32 iPage, /* Database page number for frame */ | 692 u32 iPage, /* Database page number for frame */ |
692 u32 nTruncate, /* New db size (or 0 for non-commit frames) */ | 693 u32 nTruncate, /* New db size (or 0 for non-commit frames) */ |
693 u8 *aData, /* Pointer to page data */ | 694 u8 *aData, /* Pointer to page data */ |
694 u8 *aFrame /* OUT: Write encoded frame here */ | 695 u8 *aFrame /* OUT: Write encoded frame here */ |
695 ){ | 696 ){ |
696 int nativeCksum; /* True for native byte-order checksums */ | 697 int nativeCksum; /* True for native byte-order checksums */ |
697 u32 *aCksum = pWal->hdr.aFrameCksum; | 698 u32 *aCksum = pWal->hdr.aFrameCksum; |
698 assert( WAL_FRAME_HDRSIZE==24 ); | 699 assert( WAL_FRAME_HDRSIZE==24 ); |
699 sqlite3Put4byte(&aFrame[0], iPage); | 700 sqlite3Put4byte(&aFrame[0], iPage); |
700 sqlite3Put4byte(&aFrame[4], nTruncate); | 701 sqlite3Put4byte(&aFrame[4], nTruncate); |
701 memcpy(&aFrame[8], pWal->hdr.aSalt, 8); | 702 if( pWal->iReCksum==0 ){ |
| 703 memcpy(&aFrame[8], pWal->hdr.aSalt, 8); |
702 | 704 |
703 nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); | 705 nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); |
704 walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); | 706 walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); |
705 walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); | 707 walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); |
706 | 708 |
707 sqlite3Put4byte(&aFrame[16], aCksum[0]); | 709 sqlite3Put4byte(&aFrame[16], aCksum[0]); |
708 sqlite3Put4byte(&aFrame[20], aCksum[1]); | 710 sqlite3Put4byte(&aFrame[20], aCksum[1]); |
| 711 }else{ |
| 712 memset(&aFrame[8], 0, 16); |
| 713 } |
709 } | 714 } |
710 | 715 |
711 /* | 716 /* |
712 ** Check to see if the frame with header in aFrame[] and content | 717 ** Check to see if the frame with header in aFrame[] and content |
713 ** in aData[] is valid. If it is a valid frame, fill *piPage and | 718 ** in aData[] is valid. If it is a valid frame, fill *piPage and |
714 ** *pnTruncate and return true. Return if the frame is not valid. | 719 ** *pnTruncate and return true. Return if the frame is not valid. |
715 */ | 720 */ |
716 static int walDecodeFrame( | 721 static int walDecodeFrame( |
717 Wal *pWal, /* The write-ahead log */ | 722 Wal *pWal, /* The write-ahead log */ |
718 u32 *piPage, /* OUT: Database page number for frame */ | 723 u32 *piPage, /* OUT: Database page number for frame */ |
(...skipping 442 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1161 version = sqlite3Get4byte(&aBuf[4]); | 1166 version = sqlite3Get4byte(&aBuf[4]); |
1162 if( version!=WAL_MAX_VERSION ){ | 1167 if( version!=WAL_MAX_VERSION ){ |
1163 rc = SQLITE_CANTOPEN_BKPT; | 1168 rc = SQLITE_CANTOPEN_BKPT; |
1164 goto finished; | 1169 goto finished; |
1165 } | 1170 } |
1166 | 1171 |
1167 /* Malloc a buffer to read frames into. */ | 1172 /* Malloc a buffer to read frames into. */ |
1168 szFrame = szPage + WAL_FRAME_HDRSIZE; | 1173 szFrame = szPage + WAL_FRAME_HDRSIZE; |
1169 aFrame = (u8 *)sqlite3_malloc64(szFrame); | 1174 aFrame = (u8 *)sqlite3_malloc64(szFrame); |
1170 if( !aFrame ){ | 1175 if( !aFrame ){ |
1171 rc = SQLITE_NOMEM; | 1176 rc = SQLITE_NOMEM_BKPT; |
1172 goto recovery_error; | 1177 goto recovery_error; |
1173 } | 1178 } |
1174 aData = &aFrame[WAL_FRAME_HDRSIZE]; | 1179 aData = &aFrame[WAL_FRAME_HDRSIZE]; |
1175 | 1180 |
1176 /* Read all frames from the log file. */ | 1181 /* Read all frames from the log file. */ |
1177 iFrame = 0; | 1182 iFrame = 0; |
1178 for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ | 1183 for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ |
1179 u32 pgno; /* Database page number for frame */ | 1184 u32 pgno; /* Database page number for frame */ |
1180 u32 nTruncate; /* dbsize field from frame header */ | 1185 u32 nTruncate; /* dbsize field from frame header */ |
1181 | 1186 |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1299 #endif | 1304 #endif |
1300 #ifdef UNIX_SHM_BASE | 1305 #ifdef UNIX_SHM_BASE |
1301 assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); | 1306 assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); |
1302 #endif | 1307 #endif |
1303 | 1308 |
1304 | 1309 |
1305 /* Allocate an instance of struct Wal to return. */ | 1310 /* Allocate an instance of struct Wal to return. */ |
1306 *ppWal = 0; | 1311 *ppWal = 0; |
1307 pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); | 1312 pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); |
1308 if( !pRet ){ | 1313 if( !pRet ){ |
1309 return SQLITE_NOMEM; | 1314 return SQLITE_NOMEM_BKPT; |
1310 } | 1315 } |
1311 | 1316 |
1312 pRet->pVfs = pVfs; | 1317 pRet->pVfs = pVfs; |
1313 pRet->pWalFd = (sqlite3_file *)&pRet[1]; | 1318 pRet->pWalFd = (sqlite3_file *)&pRet[1]; |
1314 pRet->pDbFd = pDbFd; | 1319 pRet->pDbFd = pDbFd; |
1315 pRet->readLock = -1; | 1320 pRet->readLock = -1; |
1316 pRet->mxWalSize = mxWalSize; | 1321 pRet->mxWalSize = mxWalSize; |
1317 pRet->zWalName = zWalName; | 1322 pRet->zWalName = zWalName; |
1318 pRet->syncHeader = 1; | 1323 pRet->syncHeader = 1; |
1319 pRet->padToSectorBoundary = 1; | 1324 pRet->padToSectorBoundary = 1; |
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1563 assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); | 1568 assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); |
1564 iLast = pWal->hdr.mxFrame; | 1569 iLast = pWal->hdr.mxFrame; |
1565 | 1570 |
1566 /* Allocate space for the WalIterator object. */ | 1571 /* Allocate space for the WalIterator object. */ |
1567 nSegment = walFramePage(iLast) + 1; | 1572 nSegment = walFramePage(iLast) + 1; |
1568 nByte = sizeof(WalIterator) | 1573 nByte = sizeof(WalIterator) |
1569 + (nSegment-1)*sizeof(struct WalSegment) | 1574 + (nSegment-1)*sizeof(struct WalSegment) |
1570 + iLast*sizeof(ht_slot); | 1575 + iLast*sizeof(ht_slot); |
1571 p = (WalIterator *)sqlite3_malloc64(nByte); | 1576 p = (WalIterator *)sqlite3_malloc64(nByte); |
1572 if( !p ){ | 1577 if( !p ){ |
1573 return SQLITE_NOMEM; | 1578 return SQLITE_NOMEM_BKPT; |
1574 } | 1579 } |
1575 memset(p, 0, nByte); | 1580 memset(p, 0, nByte); |
1576 p->nSegment = nSegment; | 1581 p->nSegment = nSegment; |
1577 | 1582 |
1578 /* Allocate temporary space used by the merge-sort routine. This block | 1583 /* Allocate temporary space used by the merge-sort routine. This block |
1579 ** of memory will be freed before this function returns. | 1584 ** of memory will be freed before this function returns. |
1580 */ | 1585 */ |
1581 aTmp = (ht_slot *)sqlite3_malloc64( | 1586 aTmp = (ht_slot *)sqlite3_malloc64( |
1582 sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) | 1587 sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) |
1583 ); | 1588 ); |
1584 if( !aTmp ){ | 1589 if( !aTmp ){ |
1585 rc = SQLITE_NOMEM; | 1590 rc = SQLITE_NOMEM_BKPT; |
1586 } | 1591 } |
1587 | 1592 |
1588 for(i=0; rc==SQLITE_OK && i<nSegment; i++){ | 1593 for(i=0; rc==SQLITE_OK && i<nSegment; i++){ |
1589 volatile ht_slot *aHash; | 1594 volatile ht_slot *aHash; |
1590 u32 iZero; | 1595 u32 iZero; |
1591 volatile u32 *aPgno; | 1596 volatile u32 *aPgno; |
1592 | 1597 |
1593 rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero); | 1598 rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero); |
1594 if( rc==SQLITE_OK ){ | 1599 if( rc==SQLITE_OK ){ |
1595 int j; /* Counter variable */ | 1600 int j; /* Counter variable */ |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1711 ** This is the only routine that will increase the value of nBackfill. | 1716 ** This is the only routine that will increase the value of nBackfill. |
1712 ** (A WAL reset or recovery will revert nBackfill to zero, but not increase | 1717 ** (A WAL reset or recovery will revert nBackfill to zero, but not increase |
1713 ** its value.) | 1718 ** its value.) |
1714 ** | 1719 ** |
1715 ** The caller must be holding sufficient locks to ensure that no other | 1720 ** The caller must be holding sufficient locks to ensure that no other |
1716 ** checkpoint is running (in any other thread or process) at the same | 1721 ** checkpoint is running (in any other thread or process) at the same |
1717 ** time. | 1722 ** time. |
1718 */ | 1723 */ |
1719 static int walCheckpoint( | 1724 static int walCheckpoint( |
1720 Wal *pWal, /* Wal connection */ | 1725 Wal *pWal, /* Wal connection */ |
| 1726 sqlite3 *db, /* Check for interrupts on this handle */ |
1721 int eMode, /* One of PASSIVE, FULL or RESTART */ | 1727 int eMode, /* One of PASSIVE, FULL or RESTART */ |
1722 int (*xBusy)(void*), /* Function to call when busy */ | 1728 int (*xBusy)(void*), /* Function to call when busy */ |
1723 void *pBusyArg, /* Context argument for xBusyHandler */ | 1729 void *pBusyArg, /* Context argument for xBusyHandler */ |
1724 int sync_flags, /* Flags for OsSync() (or 0) */ | 1730 int sync_flags, /* Flags for OsSync() (or 0) */ |
1725 u8 *zBuf /* Temporary buffer to use */ | 1731 u8 *zBuf /* Temporary buffer to use */ |
1726 ){ | 1732 ){ |
1727 int rc = SQLITE_OK; /* Return code */ | 1733 int rc = SQLITE_OK; /* Return code */ |
1728 int szPage; /* Database page-size */ | 1734 int szPage; /* Database page-size */ |
1729 WalIterator *pIter = 0; /* Wal iterator context */ | 1735 WalIterator *pIter = 0; /* Wal iterator context */ |
1730 u32 iDbpage = 0; /* Next database page to write */ | 1736 u32 iDbpage = 0; /* Next database page to write */ |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1805 if( rc==SQLITE_OK && nSize<nReq ){ | 1811 if( rc==SQLITE_OK && nSize<nReq ){ |
1806 sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); | 1812 sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); |
1807 } | 1813 } |
1808 } | 1814 } |
1809 | 1815 |
1810 | 1816 |
1811 /* Iterate through the contents of the WAL, copying data to the db file */ | 1817 /* Iterate through the contents of the WAL, copying data to the db file */ |
1812 while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ | 1818 while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ |
1813 i64 iOffset; | 1819 i64 iOffset; |
1814 assert( walFramePgno(pWal, iFrame)==iDbpage ); | 1820 assert( walFramePgno(pWal, iFrame)==iDbpage ); |
| 1821 if( db->u1.isInterrupted ){ |
| 1822 rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT; |
| 1823 break; |
| 1824 } |
1815 if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ | 1825 if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ |
1816 continue; | 1826 continue; |
1817 } | 1827 } |
1818 iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; | 1828 iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; |
1819 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ | 1829 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ |
1820 rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); | 1830 rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); |
1821 if( rc!=SQLITE_OK ) break; | 1831 if( rc!=SQLITE_OK ) break; |
1822 iOffset = (iDbpage-1)*(i64)szPage; | 1832 iOffset = (iDbpage-1)*(i64)szPage; |
1823 testcase( IS_BIG_INT(iOffset) ); | 1833 testcase( IS_BIG_INT(iOffset) ); |
1824 rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); | 1834 rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1909 if( rx ){ | 1919 if( rx ){ |
1910 sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); | 1920 sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); |
1911 } | 1921 } |
1912 } | 1922 } |
1913 | 1923 |
1914 /* | 1924 /* |
1915 ** Close a connection to a log file. | 1925 ** Close a connection to a log file. |
1916 */ | 1926 */ |
1917 int sqlite3WalClose( | 1927 int sqlite3WalClose( |
1918 Wal *pWal, /* Wal to close */ | 1928 Wal *pWal, /* Wal to close */ |
| 1929 sqlite3 *db, /* For interrupt flag */ |
1919 int sync_flags, /* Flags to pass to OsSync() (or 0) */ | 1930 int sync_flags, /* Flags to pass to OsSync() (or 0) */ |
1920 int nBuf, | 1931 int nBuf, |
1921 u8 *zBuf /* Buffer of at least nBuf bytes */ | 1932 u8 *zBuf /* Buffer of at least nBuf bytes */ |
1922 ){ | 1933 ){ |
1923 int rc = SQLITE_OK; | 1934 int rc = SQLITE_OK; |
1924 if( pWal ){ | 1935 if( pWal ){ |
1925 int isDelete = 0; /* True to unlink wal and wal-index files */ | 1936 int isDelete = 0; /* True to unlink wal and wal-index files */ |
1926 | 1937 |
1927 /* If an EXCLUSIVE lock can be obtained on the database file (using the | 1938 /* If an EXCLUSIVE lock can be obtained on the database file (using the |
1928 ** ordinary, rollback-mode locking methods, this guarantees that the | 1939 ** ordinary, rollback-mode locking methods, this guarantees that the |
1929 ** connection associated with this log file is the only connection to | 1940 ** connection associated with this log file is the only connection to |
1930 ** the database. In this case checkpoint the database and unlink both | 1941 ** the database. In this case checkpoint the database and unlink both |
1931 ** the wal and wal-index files. | 1942 ** the wal and wal-index files. |
1932 ** | 1943 ** |
1933 ** The EXCLUSIVE lock is not released before returning. | 1944 ** The EXCLUSIVE lock is not released before returning. |
1934 */ | 1945 */ |
1935 rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); | 1946 if( zBuf!=0 |
1936 if( rc==SQLITE_OK ){ | 1947 && SQLITE_OK==(rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE)) |
| 1948 ){ |
1937 if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ | 1949 if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ |
1938 pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; | 1950 pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; |
1939 } | 1951 } |
1940 rc = sqlite3WalCheckpoint( | 1952 rc = sqlite3WalCheckpoint(pWal, db, |
1941 pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 | 1953 SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 |
1942 ); | 1954 ); |
1943 if( rc==SQLITE_OK ){ | 1955 if( rc==SQLITE_OK ){ |
1944 int bPersist = -1; | 1956 int bPersist = -1; |
1945 sqlite3OsFileControlHint( | 1957 sqlite3OsFileControlHint( |
1946 pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist | 1958 pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist |
1947 ); | 1959 ); |
1948 if( bPersist!=1 ){ | 1960 if( bPersist!=1 ){ |
1949 /* Try to delete the WAL file if the checkpoint completed and | 1961 /* Try to delete the WAL file if the checkpoint completed and |
1950 ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal | 1962 ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal |
1951 ** mode (!bPersist) */ | 1963 ** mode (!bPersist) */ |
(...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2360 ){ | 2372 ){ |
2361 walUnlockShared(pWal, WAL_READ_LOCK(mxI)); | 2373 walUnlockShared(pWal, WAL_READ_LOCK(mxI)); |
2362 return WAL_RETRY; | 2374 return WAL_RETRY; |
2363 }else{ | 2375 }else{ |
2364 assert( mxReadMark<=pWal->hdr.mxFrame ); | 2376 assert( mxReadMark<=pWal->hdr.mxFrame ); |
2365 pWal->readLock = (i16)mxI; | 2377 pWal->readLock = (i16)mxI; |
2366 } | 2378 } |
2367 return rc; | 2379 return rc; |
2368 } | 2380 } |
2369 | 2381 |
| 2382 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 2383 /* |
| 2384 ** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted |
| 2385 ** variable so that older snapshots can be accessed. To do this, loop |
| 2386 ** through all wal frames from nBackfillAttempted to (nBackfill+1), |
| 2387 ** comparing their content to the corresponding page with the database |
| 2388 ** file, if any. Set nBackfillAttempted to the frame number of the |
| 2389 ** first frame for which the wal file content matches the db file. |
| 2390 ** |
| 2391 ** This is only really safe if the file-system is such that any page |
| 2392 ** writes made by earlier checkpointers were atomic operations, which |
| 2393 ** is not always true. It is also possible that nBackfillAttempted |
| 2394 ** may be left set to a value larger than expected, if a wal frame |
| 2395 ** contains content that duplicate of an earlier version of the same |
| 2396 ** page. |
| 2397 ** |
| 2398 ** SQLITE_OK is returned if successful, or an SQLite error code if an |
| 2399 ** error occurs. It is not an error if nBackfillAttempted cannot be |
| 2400 ** decreased at all. |
| 2401 */ |
| 2402 int sqlite3WalSnapshotRecover(Wal *pWal){ |
| 2403 int rc; |
| 2404 |
| 2405 assert( pWal->readLock>=0 ); |
| 2406 rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); |
| 2407 if( rc==SQLITE_OK ){ |
| 2408 volatile WalCkptInfo *pInfo = walCkptInfo(pWal); |
| 2409 int szPage = (int)pWal->szPage; |
| 2410 i64 szDb; /* Size of db file in bytes */ |
| 2411 |
| 2412 rc = sqlite3OsFileSize(pWal->pDbFd, &szDb); |
| 2413 if( rc==SQLITE_OK ){ |
| 2414 void *pBuf1 = sqlite3_malloc(szPage); |
| 2415 void *pBuf2 = sqlite3_malloc(szPage); |
| 2416 if( pBuf1==0 || pBuf2==0 ){ |
| 2417 rc = SQLITE_NOMEM; |
| 2418 }else{ |
| 2419 u32 i = pInfo->nBackfillAttempted; |
| 2420 for(i=pInfo->nBackfillAttempted; i>pInfo->nBackfill; i--){ |
| 2421 volatile ht_slot *dummy; |
| 2422 volatile u32 *aPgno; /* Array of page numbers */ |
| 2423 u32 iZero; /* Frame corresponding to aPgno[0] */ |
| 2424 u32 pgno; /* Page number in db file */ |
| 2425 i64 iDbOff; /* Offset of db file entry */ |
| 2426 i64 iWalOff; /* Offset of wal file entry */ |
| 2427 |
| 2428 rc = walHashGet(pWal, walFramePage(i), &dummy, &aPgno, &iZero); |
| 2429 if( rc!=SQLITE_OK ) break; |
| 2430 pgno = aPgno[i-iZero]; |
| 2431 iDbOff = (i64)(pgno-1) * szPage; |
| 2432 |
| 2433 if( iDbOff+szPage<=szDb ){ |
| 2434 iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE; |
| 2435 rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff); |
| 2436 |
| 2437 if( rc==SQLITE_OK ){ |
| 2438 rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff); |
| 2439 } |
| 2440 |
| 2441 if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){ |
| 2442 break; |
| 2443 } |
| 2444 } |
| 2445 |
| 2446 pInfo->nBackfillAttempted = i-1; |
| 2447 } |
| 2448 } |
| 2449 |
| 2450 sqlite3_free(pBuf1); |
| 2451 sqlite3_free(pBuf2); |
| 2452 } |
| 2453 walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); |
| 2454 } |
| 2455 |
| 2456 return rc; |
| 2457 } |
| 2458 #endif /* SQLITE_ENABLE_SNAPSHOT */ |
| 2459 |
2370 /* | 2460 /* |
2371 ** Begin a read transaction on the database. | 2461 ** Begin a read transaction on the database. |
2372 ** | 2462 ** |
2373 ** This routine used to be called sqlite3OpenSnapshot() and with good reason: | 2463 ** This routine used to be called sqlite3OpenSnapshot() and with good reason: |
2374 ** it takes a snapshot of the state of the WAL and wal-index for the current | 2464 ** it takes a snapshot of the state of the WAL and wal-index for the current |
2375 ** instant in time. The current thread will continue to use this snapshot. | 2465 ** instant in time. The current thread will continue to use this snapshot. |
2376 ** Other threads might append new content to the WAL and wal-index but | 2466 ** Other threads might append new content to the WAL and wal-index but |
2377 ** that extra content is ignored by the current thread. | 2467 ** that extra content is ignored by the current thread. |
2378 ** | 2468 ** |
2379 ** If the database contents have changes since the previous read | 2469 ** If the database contents have changes since the previous read |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2422 assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); | 2512 assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); |
2423 assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); | 2513 assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); |
2424 | 2514 |
2425 /* It is possible that there is a checkpointer thread running | 2515 /* It is possible that there is a checkpointer thread running |
2426 ** concurrent with this code. If this is the case, it may be that the | 2516 ** concurrent with this code. If this is the case, it may be that the |
2427 ** checkpointer has already determined that it will checkpoint | 2517 ** checkpointer has already determined that it will checkpoint |
2428 ** snapshot X, where X is later in the wal file than pSnapshot, but | 2518 ** snapshot X, where X is later in the wal file than pSnapshot, but |
2429 ** has not yet set the pInfo->nBackfillAttempted variable to indicate | 2519 ** has not yet set the pInfo->nBackfillAttempted variable to indicate |
2430 ** its intent. To avoid the race condition this leads to, ensure that | 2520 ** its intent. To avoid the race condition this leads to, ensure that |
2431 ** there is no checkpointer process by taking a shared CKPT lock | 2521 ** there is no checkpointer process by taking a shared CKPT lock |
2432 ** before checking pInfo->nBackfillAttempted. */ | 2522 ** before checking pInfo->nBackfillAttempted. |
| 2523 ** |
| 2524 ** TODO: Does the aReadMark[] lock prevent a checkpointer from doing |
| 2525 ** this already? |
| 2526 */ |
2433 rc = walLockShared(pWal, WAL_CKPT_LOCK); | 2527 rc = walLockShared(pWal, WAL_CKPT_LOCK); |
2434 | 2528 |
2435 if( rc==SQLITE_OK ){ | 2529 if( rc==SQLITE_OK ){ |
2436 /* Check that the wal file has not been wrapped. Assuming that it has | 2530 /* Check that the wal file has not been wrapped. Assuming that it has |
2437 ** not, also check that no checkpointer has attempted to checkpoint any | 2531 ** not, also check that no checkpointer has attempted to checkpoint any |
2438 ** frames beyond pSnapshot->mxFrame. If either of these conditions are | 2532 ** frames beyond pSnapshot->mxFrame. If either of these conditions are |
2439 ** true, return SQLITE_BUSY_SNAPSHOT. Otherwise, overwrite pWal->hdr | 2533 ** true, return SQLITE_BUSY_SNAPSHOT. Otherwise, overwrite pWal->hdr |
2440 ** with *pSnapshot and set *pChanged as appropriate for opening the | 2534 ** with *pSnapshot and set *pChanged as appropriate for opening the |
2441 ** snapshot. */ | 2535 ** snapshot. */ |
2442 if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) | 2536 if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) |
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2625 ** returns SQLITE_BUSY in that case and no write transaction is started. | 2719 ** returns SQLITE_BUSY in that case and no write transaction is started. |
2626 ** | 2720 ** |
2627 ** There can only be a single writer active at a time. | 2721 ** There can only be a single writer active at a time. |
2628 */ | 2722 */ |
2629 int sqlite3WalBeginWriteTransaction(Wal *pWal){ | 2723 int sqlite3WalBeginWriteTransaction(Wal *pWal){ |
2630 int rc; | 2724 int rc; |
2631 | 2725 |
2632 /* Cannot start a write transaction without first holding a read | 2726 /* Cannot start a write transaction without first holding a read |
2633 ** transaction. */ | 2727 ** transaction. */ |
2634 assert( pWal->readLock>=0 ); | 2728 assert( pWal->readLock>=0 ); |
| 2729 assert( pWal->writeLock==0 && pWal->iReCksum==0 ); |
2635 | 2730 |
2636 if( pWal->readOnly ){ | 2731 if( pWal->readOnly ){ |
2637 return SQLITE_READONLY; | 2732 return SQLITE_READONLY; |
2638 } | 2733 } |
2639 | 2734 |
2640 /* Only one writer allowed at a time. Get the write lock. Return | 2735 /* Only one writer allowed at a time. Get the write lock. Return |
2641 ** SQLITE_BUSY if unable. | 2736 ** SQLITE_BUSY if unable. |
2642 */ | 2737 */ |
2643 rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); | 2738 rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); |
2644 if( rc ){ | 2739 if( rc ){ |
(...skipping 15 matching lines...) Expand all Loading... |
2660 } | 2755 } |
2661 | 2756 |
2662 /* | 2757 /* |
2663 ** End a write transaction. The commit has already been done. This | 2758 ** End a write transaction. The commit has already been done. This |
2664 ** routine merely releases the lock. | 2759 ** routine merely releases the lock. |
2665 */ | 2760 */ |
2666 int sqlite3WalEndWriteTransaction(Wal *pWal){ | 2761 int sqlite3WalEndWriteTransaction(Wal *pWal){ |
2667 if( pWal->writeLock ){ | 2762 if( pWal->writeLock ){ |
2668 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 2763 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); |
2669 pWal->writeLock = 0; | 2764 pWal->writeLock = 0; |
| 2765 pWal->iReCksum = 0; |
2670 pWal->truncateOnCommit = 0; | 2766 pWal->truncateOnCommit = 0; |
2671 } | 2767 } |
2672 return SQLITE_OK; | 2768 return SQLITE_OK; |
2673 } | 2769 } |
2674 | 2770 |
2675 /* | 2771 /* |
2676 ** If any data has been written (but not committed) to the log file, this | 2772 ** If any data has been written (but not committed) to the log file, this |
2677 ** function moves the write-pointer back to the start of the transaction. | 2773 ** function moves the write-pointer back to the start of the transaction. |
2678 ** | 2774 ** |
2679 ** Additionally, the callback function is invoked for each frame written | 2775 ** Additionally, the callback function is invoked for each frame written |
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2866 static int walWriteOneFrame( | 2962 static int walWriteOneFrame( |
2867 WalWriter *p, /* Where to write the frame */ | 2963 WalWriter *p, /* Where to write the frame */ |
2868 PgHdr *pPage, /* The page of the frame to be written */ | 2964 PgHdr *pPage, /* The page of the frame to be written */ |
2869 int nTruncate, /* The commit flag. Usually 0. >0 for commit */ | 2965 int nTruncate, /* The commit flag. Usually 0. >0 for commit */ |
2870 sqlite3_int64 iOffset /* Byte offset at which to write */ | 2966 sqlite3_int64 iOffset /* Byte offset at which to write */ |
2871 ){ | 2967 ){ |
2872 int rc; /* Result code from subfunctions */ | 2968 int rc; /* Result code from subfunctions */ |
2873 void *pData; /* Data actually written */ | 2969 void *pData; /* Data actually written */ |
2874 u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ | 2970 u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ |
2875 #if defined(SQLITE_HAS_CODEC) | 2971 #if defined(SQLITE_HAS_CODEC) |
2876 if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; | 2972 if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM_BKPT; |
2877 #else | 2973 #else |
2878 pData = pPage->pData; | 2974 pData = pPage->pData; |
2879 #endif | 2975 #endif |
2880 walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); | 2976 walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); |
2881 rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); | 2977 rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); |
2882 if( rc ) return rc; | 2978 if( rc ) return rc; |
2883 /* Write the page data */ | 2979 /* Write the page data */ |
2884 rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); | 2980 rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); |
2885 return rc; | 2981 return rc; |
2886 } | 2982 } |
2887 | 2983 |
| 2984 /* |
| 2985 ** This function is called as part of committing a transaction within which |
| 2986 ** one or more frames have been overwritten. It updates the checksums for |
| 2987 ** all frames written to the wal file by the current transaction starting |
| 2988 ** with the earliest to have been overwritten. |
| 2989 ** |
| 2990 ** SQLITE_OK is returned if successful, or an SQLite error code otherwise. |
| 2991 */ |
| 2992 static int walRewriteChecksums(Wal *pWal, u32 iLast){ |
| 2993 const int szPage = pWal->szPage;/* Database page size */ |
| 2994 int rc = SQLITE_OK; /* Return code */ |
| 2995 u8 *aBuf; /* Buffer to load data from wal file into */ |
| 2996 u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ |
| 2997 u32 iRead; /* Next frame to read from wal file */ |
| 2998 i64 iCksumOff; |
| 2999 |
| 3000 aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); |
| 3001 if( aBuf==0 ) return SQLITE_NOMEM_BKPT; |
| 3002 |
| 3003 /* Find the checksum values to use as input for the recalculating the |
| 3004 ** first checksum. If the first frame is frame 1 (implying that the current |
| 3005 ** transaction restarted the wal file), these values must be read from the |
| 3006 ** wal-file header. Otherwise, read them from the frame header of the |
| 3007 ** previous frame. */ |
| 3008 assert( pWal->iReCksum>0 ); |
| 3009 if( pWal->iReCksum==1 ){ |
| 3010 iCksumOff = 24; |
| 3011 }else{ |
| 3012 iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16; |
| 3013 } |
| 3014 rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff); |
| 3015 pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); |
| 3016 pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); |
| 3017 |
| 3018 iRead = pWal->iReCksum; |
| 3019 pWal->iReCksum = 0; |
| 3020 for(; rc==SQLITE_OK && iRead<=iLast; iRead++){ |
| 3021 i64 iOff = walFrameOffset(iRead, szPage); |
| 3022 rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); |
| 3023 if( rc==SQLITE_OK ){ |
| 3024 u32 iPgno, nDbSize; |
| 3025 iPgno = sqlite3Get4byte(aBuf); |
| 3026 nDbSize = sqlite3Get4byte(&aBuf[4]); |
| 3027 |
| 3028 walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); |
| 3029 rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); |
| 3030 } |
| 3031 } |
| 3032 |
| 3033 sqlite3_free(aBuf); |
| 3034 return rc; |
| 3035 } |
| 3036 |
2888 /* | 3037 /* |
2889 ** Write a set of frames to the log. The caller must hold the write-lock | 3038 ** Write a set of frames to the log. The caller must hold the write-lock |
2890 ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). | 3039 ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). |
2891 */ | 3040 */ |
2892 int sqlite3WalFrames( | 3041 int sqlite3WalFrames( |
2893 Wal *pWal, /* Wal handle to write to */ | 3042 Wal *pWal, /* Wal handle to write to */ |
2894 int szPage, /* Database page-size in bytes */ | 3043 int szPage, /* Database page-size in bytes */ |
2895 PgHdr *pList, /* List of dirty pages to write */ | 3044 PgHdr *pList, /* List of dirty pages to write */ |
2896 Pgno nTruncate, /* Database size after this commit */ | 3045 Pgno nTruncate, /* Database size after this commit */ |
2897 int isCommit, /* True if this is a commit */ | 3046 int isCommit, /* True if this is a commit */ |
2898 int sync_flags /* Flags to pass to OsSync() (or 0) */ | 3047 int sync_flags /* Flags to pass to OsSync() (or 0) */ |
2899 ){ | 3048 ){ |
2900 int rc; /* Used to catch return codes */ | 3049 int rc; /* Used to catch return codes */ |
2901 u32 iFrame; /* Next frame address */ | 3050 u32 iFrame; /* Next frame address */ |
2902 PgHdr *p; /* Iterator to run through pList with. */ | 3051 PgHdr *p; /* Iterator to run through pList with. */ |
2903 PgHdr *pLast = 0; /* Last frame in list */ | 3052 PgHdr *pLast = 0; /* Last frame in list */ |
2904 int nExtra = 0; /* Number of extra copies of last page */ | 3053 int nExtra = 0; /* Number of extra copies of last page */ |
2905 int szFrame; /* The size of a single frame */ | 3054 int szFrame; /* The size of a single frame */ |
2906 i64 iOffset; /* Next byte to write in WAL file */ | 3055 i64 iOffset; /* Next byte to write in WAL file */ |
2907 WalWriter w; /* The writer */ | 3056 WalWriter w; /* The writer */ |
| 3057 u32 iFirst = 0; /* First frame that may be overwritten */ |
| 3058 WalIndexHdr *pLive; /* Pointer to shared header */ |
2908 | 3059 |
2909 assert( pList ); | 3060 assert( pList ); |
2910 assert( pWal->writeLock ); | 3061 assert( pWal->writeLock ); |
2911 | 3062 |
2912 /* If this frame set completes a transaction, then nTruncate>0. If | 3063 /* If this frame set completes a transaction, then nTruncate>0. If |
2913 ** nTruncate==0 then this frame set does not complete the transaction. */ | 3064 ** nTruncate==0 then this frame set does not complete the transaction. */ |
2914 assert( (isCommit!=0)==(nTruncate!=0) ); | 3065 assert( (isCommit!=0)==(nTruncate!=0) ); |
2915 | 3066 |
2916 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) | 3067 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) |
2917 { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} | 3068 { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} |
2918 WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", | 3069 WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", |
2919 pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); | 3070 pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); |
2920 } | 3071 } |
2921 #endif | 3072 #endif |
2922 | 3073 |
| 3074 pLive = (WalIndexHdr*)walIndexHdr(pWal); |
| 3075 if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){ |
| 3076 iFirst = pLive->mxFrame+1; |
| 3077 } |
| 3078 |
2923 /* See if it is possible to write these frames into the start of the | 3079 /* See if it is possible to write these frames into the start of the |
2924 ** log file, instead of appending to it at pWal->hdr.mxFrame. | 3080 ** log file, instead of appending to it at pWal->hdr.mxFrame. |
2925 */ | 3081 */ |
2926 if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ | 3082 if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ |
2927 return rc; | 3083 return rc; |
2928 } | 3084 } |
2929 | 3085 |
2930 /* If this is the first frame written into the log, write the WAL | 3086 /* If this is the first frame written into the log, write the WAL |
2931 ** header to the start of the WAL file. See comments at the top of | 3087 ** header to the start of the WAL file. See comments at the top of |
2932 ** this source file for a description of the WAL header format. | 3088 ** this source file for a description of the WAL header format. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2977 w.pFd = pWal->pWalFd; | 3133 w.pFd = pWal->pWalFd; |
2978 w.iSyncPoint = 0; | 3134 w.iSyncPoint = 0; |
2979 w.syncFlags = sync_flags; | 3135 w.syncFlags = sync_flags; |
2980 w.szPage = szPage; | 3136 w.szPage = szPage; |
2981 iOffset = walFrameOffset(iFrame+1, szPage); | 3137 iOffset = walFrameOffset(iFrame+1, szPage); |
2982 szFrame = szPage + WAL_FRAME_HDRSIZE; | 3138 szFrame = szPage + WAL_FRAME_HDRSIZE; |
2983 | 3139 |
2984 /* Write all frames into the log file exactly once */ | 3140 /* Write all frames into the log file exactly once */ |
2985 for(p=pList; p; p=p->pDirty){ | 3141 for(p=pList; p; p=p->pDirty){ |
2986 int nDbSize; /* 0 normally. Positive == commit flag */ | 3142 int nDbSize; /* 0 normally. Positive == commit flag */ |
| 3143 |
| 3144 /* Check if this page has already been written into the wal file by |
| 3145 ** the current transaction. If so, overwrite the existing frame and |
| 3146 ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that |
| 3147 ** checksums must be recomputed when the transaction is committed. */ |
| 3148 if( iFirst && (p->pDirty || isCommit==0) ){ |
| 3149 u32 iWrite = 0; |
| 3150 VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite); |
| 3151 assert( rc==SQLITE_OK || iWrite==0 ); |
| 3152 if( iWrite>=iFirst ){ |
| 3153 i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE; |
| 3154 void *pData; |
| 3155 if( pWal->iReCksum==0 || iWrite<pWal->iReCksum ){ |
| 3156 pWal->iReCksum = iWrite; |
| 3157 } |
| 3158 #if defined(SQLITE_HAS_CODEC) |
| 3159 if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM; |
| 3160 #else |
| 3161 pData = p->pData; |
| 3162 #endif |
| 3163 rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOff); |
| 3164 if( rc ) return rc; |
| 3165 p->flags &= ~PGHDR_WAL_APPEND; |
| 3166 continue; |
| 3167 } |
| 3168 } |
| 3169 |
2987 iFrame++; | 3170 iFrame++; |
2988 assert( iOffset==walFrameOffset(iFrame, szPage) ); | 3171 assert( iOffset==walFrameOffset(iFrame, szPage) ); |
2989 nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; | 3172 nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; |
2990 rc = walWriteOneFrame(&w, p, nDbSize, iOffset); | 3173 rc = walWriteOneFrame(&w, p, nDbSize, iOffset); |
2991 if( rc ) return rc; | 3174 if( rc ) return rc; |
2992 pLast = p; | 3175 pLast = p; |
2993 iOffset += szFrame; | 3176 iOffset += szFrame; |
| 3177 p->flags |= PGHDR_WAL_APPEND; |
| 3178 } |
| 3179 |
| 3180 /* Recalculate checksums within the wal file if required. */ |
| 3181 if( isCommit && pWal->iReCksum ){ |
| 3182 rc = walRewriteChecksums(pWal, iFrame); |
| 3183 if( rc ) return rc; |
2994 } | 3184 } |
2995 | 3185 |
2996 /* If this is the end of a transaction, then we might need to pad | 3186 /* If this is the end of a transaction, then we might need to pad |
2997 ** the transaction and/or sync the WAL file. | 3187 ** the transaction and/or sync the WAL file. |
2998 ** | 3188 ** |
2999 ** Padding and syncing only occur if this set of frames complete a | 3189 ** Padding and syncing only occur if this set of frames complete a |
3000 ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL | 3190 ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL |
3001 ** or synchronous==OFF, then no padding or syncing are needed. | 3191 ** or synchronous==OFF, then no padding or syncing are needed. |
3002 ** | 3192 ** |
3003 ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not | 3193 ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not |
3004 ** needed and only the sync is done. If padding is needed, then the | 3194 ** needed and only the sync is done. If padding is needed, then the |
3005 ** final frame is repeated (with its commit mark) until the next sector | 3195 ** final frame is repeated (with its commit mark) until the next sector |
3006 ** boundary is crossed. Only the part of the WAL prior to the last | 3196 ** boundary is crossed. Only the part of the WAL prior to the last |
3007 ** sector boundary is synced; the part of the last frame that extends | 3197 ** sector boundary is synced; the part of the last frame that extends |
3008 ** past the sector boundary is written after the sync. | 3198 ** past the sector boundary is written after the sync. |
3009 */ | 3199 */ |
3010 if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ | 3200 if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ |
| 3201 int bSync = 1; |
3011 if( pWal->padToSectorBoundary ){ | 3202 if( pWal->padToSectorBoundary ){ |
3012 int sectorSize = sqlite3SectorSize(pWal->pWalFd); | 3203 int sectorSize = sqlite3SectorSize(pWal->pWalFd); |
3013 w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; | 3204 w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; |
| 3205 bSync = (w.iSyncPoint==iOffset); |
| 3206 testcase( bSync ); |
3014 while( iOffset<w.iSyncPoint ){ | 3207 while( iOffset<w.iSyncPoint ){ |
3015 rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset); | 3208 rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset); |
3016 if( rc ) return rc; | 3209 if( rc ) return rc; |
3017 iOffset += szFrame; | 3210 iOffset += szFrame; |
3018 nExtra++; | 3211 nExtra++; |
3019 } | 3212 } |
3020 }else{ | 3213 } |
| 3214 if( bSync ){ |
| 3215 assert( rc==SQLITE_OK ); |
3021 rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); | 3216 rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); |
3022 } | 3217 } |
3023 } | 3218 } |
3024 | 3219 |
3025 /* If this frame set completes the first transaction in the WAL and | 3220 /* If this frame set completes the first transaction in the WAL and |
3026 ** if PRAGMA journal_size_limit is set, then truncate the WAL to the | 3221 ** if PRAGMA journal_size_limit is set, then truncate the WAL to the |
3027 ** journal size limit, if possible. | 3222 ** journal size limit, if possible. |
3028 */ | 3223 */ |
3029 if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ | 3224 if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ |
3030 i64 sz = pWal->mxWalSize; | 3225 i64 sz = pWal->mxWalSize; |
3031 if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ | 3226 if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ |
3032 sz = walFrameOffset(iFrame+nExtra+1, szPage); | 3227 sz = walFrameOffset(iFrame+nExtra+1, szPage); |
3033 } | 3228 } |
3034 walLimitSize(pWal, sz); | 3229 walLimitSize(pWal, sz); |
3035 pWal->truncateOnCommit = 0; | 3230 pWal->truncateOnCommit = 0; |
3036 } | 3231 } |
3037 | 3232 |
3038 /* Append data to the wal-index. It is not necessary to lock the | 3233 /* Append data to the wal-index. It is not necessary to lock the |
3039 ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index | 3234 ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index |
3040 ** guarantees that there are no other writers, and no data that may | 3235 ** guarantees that there are no other writers, and no data that may |
3041 ** be in use by existing readers is being overwritten. | 3236 ** be in use by existing readers is being overwritten. |
3042 */ | 3237 */ |
3043 iFrame = pWal->hdr.mxFrame; | 3238 iFrame = pWal->hdr.mxFrame; |
3044 for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ | 3239 for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ |
| 3240 if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; |
3045 iFrame++; | 3241 iFrame++; |
3046 rc = walIndexAppend(pWal, iFrame, p->pgno); | 3242 rc = walIndexAppend(pWal, iFrame, p->pgno); |
3047 } | 3243 } |
3048 while( rc==SQLITE_OK && nExtra>0 ){ | 3244 while( rc==SQLITE_OK && nExtra>0 ){ |
3049 iFrame++; | 3245 iFrame++; |
3050 nExtra--; | 3246 nExtra--; |
3051 rc = walIndexAppend(pWal, iFrame, pLast->pgno); | 3247 rc = walIndexAppend(pWal, iFrame, pLast->pgno); |
3052 } | 3248 } |
3053 | 3249 |
3054 if( rc==SQLITE_OK ){ | 3250 if( rc==SQLITE_OK ){ |
(...skipping 22 matching lines...) Expand all Loading... |
3077 ** related interfaces. | 3273 ** related interfaces. |
3078 ** | 3274 ** |
3079 ** Obtain a CHECKPOINT lock and then backfill as much information as | 3275 ** Obtain a CHECKPOINT lock and then backfill as much information as |
3080 ** we can from WAL into the database. | 3276 ** we can from WAL into the database. |
3081 ** | 3277 ** |
3082 ** If parameter xBusy is not NULL, it is a pointer to a busy-handler | 3278 ** If parameter xBusy is not NULL, it is a pointer to a busy-handler |
3083 ** callback. In this case this function runs a blocking checkpoint. | 3279 ** callback. In this case this function runs a blocking checkpoint. |
3084 */ | 3280 */ |
3085 int sqlite3WalCheckpoint( | 3281 int sqlite3WalCheckpoint( |
3086 Wal *pWal, /* Wal connection */ | 3282 Wal *pWal, /* Wal connection */ |
| 3283 sqlite3 *db, /* Check this handle's interrupt flag */ |
3087 int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ | 3284 int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ |
3088 int (*xBusy)(void*), /* Function to call when busy */ | 3285 int (*xBusy)(void*), /* Function to call when busy */ |
3089 void *pBusyArg, /* Context argument for xBusyHandler */ | 3286 void *pBusyArg, /* Context argument for xBusyHandler */ |
3090 int sync_flags, /* Flags to sync db file with (or 0) */ | 3287 int sync_flags, /* Flags to sync db file with (or 0) */ |
3091 int nBuf, /* Size of temporary buffer */ | 3288 int nBuf, /* Size of temporary buffer */ |
3092 u8 *zBuf, /* Temporary buffer to use */ | 3289 u8 *zBuf, /* Temporary buffer to use */ |
3093 int *pnLog, /* OUT: Number of frames in WAL */ | 3290 int *pnLog, /* OUT: Number of frames in WAL */ |
3094 int *pnCkpt /* OUT: Number of backfilled frames in WAL */ | 3291 int *pnCkpt /* OUT: Number of backfilled frames in WAL */ |
3095 ){ | 3292 ){ |
3096 int rc; /* Return code */ | 3293 int rc; /* Return code */ |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3147 /* Read the wal-index header. */ | 3344 /* Read the wal-index header. */ |
3148 if( rc==SQLITE_OK ){ | 3345 if( rc==SQLITE_OK ){ |
3149 rc = walIndexReadHdr(pWal, &isChanged); | 3346 rc = walIndexReadHdr(pWal, &isChanged); |
3150 if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ | 3347 if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ |
3151 sqlite3OsUnfetch(pWal->pDbFd, 0, 0); | 3348 sqlite3OsUnfetch(pWal->pDbFd, 0, 0); |
3152 } | 3349 } |
3153 } | 3350 } |
3154 | 3351 |
3155 /* Copy data from the log to the database file. */ | 3352 /* Copy data from the log to the database file. */ |
3156 if( rc==SQLITE_OK ){ | 3353 if( rc==SQLITE_OK ){ |
| 3354 |
3157 if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ | 3355 if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ |
3158 rc = SQLITE_CORRUPT_BKPT; | 3356 rc = SQLITE_CORRUPT_BKPT; |
3159 }else{ | 3357 }else{ |
3160 rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); | 3358 rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); |
3161 } | 3359 } |
3162 | 3360 |
3163 /* If no error occurred, set the output variables. */ | 3361 /* If no error occurred, set the output variables. */ |
3164 if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ | 3362 if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ |
3165 if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; | 3363 if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; |
3166 if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); | 3364 if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); |
3167 } | 3365 } |
3168 } | 3366 } |
3169 | 3367 |
3170 if( isChanged ){ | 3368 if( isChanged ){ |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3270 } | 3468 } |
3271 | 3469 |
3272 #ifdef SQLITE_ENABLE_SNAPSHOT | 3470 #ifdef SQLITE_ENABLE_SNAPSHOT |
3273 /* Create a snapshot object. The content of a snapshot is opaque to | 3471 /* Create a snapshot object. The content of a snapshot is opaque to |
3274 ** every other subsystem, so the WAL module can put whatever it needs | 3472 ** every other subsystem, so the WAL module can put whatever it needs |
3275 ** in the object. | 3473 ** in the object. |
3276 */ | 3474 */ |
3277 int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){ | 3475 int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){ |
3278 int rc = SQLITE_OK; | 3476 int rc = SQLITE_OK; |
3279 WalIndexHdr *pRet; | 3477 WalIndexHdr *pRet; |
| 3478 static const u32 aZero[4] = { 0, 0, 0, 0 }; |
3280 | 3479 |
3281 assert( pWal->readLock>=0 && pWal->writeLock==0 ); | 3480 assert( pWal->readLock>=0 && pWal->writeLock==0 ); |
3282 | 3481 |
| 3482 if( memcmp(&pWal->hdr.aFrameCksum[0],aZero,16)==0 ){ |
| 3483 *ppSnapshot = 0; |
| 3484 return SQLITE_ERROR; |
| 3485 } |
3283 pRet = (WalIndexHdr*)sqlite3_malloc(sizeof(WalIndexHdr)); | 3486 pRet = (WalIndexHdr*)sqlite3_malloc(sizeof(WalIndexHdr)); |
3284 if( pRet==0 ){ | 3487 if( pRet==0 ){ |
3285 rc = SQLITE_NOMEM; | 3488 rc = SQLITE_NOMEM_BKPT; |
3286 }else{ | 3489 }else{ |
3287 memcpy(pRet, &pWal->hdr, sizeof(WalIndexHdr)); | 3490 memcpy(pRet, &pWal->hdr, sizeof(WalIndexHdr)); |
3288 *ppSnapshot = (sqlite3_snapshot*)pRet; | 3491 *ppSnapshot = (sqlite3_snapshot*)pRet; |
3289 } | 3492 } |
3290 | 3493 |
3291 return rc; | 3494 return rc; |
3292 } | 3495 } |
3293 | 3496 |
3294 /* Try to open on pSnapshot when the next read-transaction starts | 3497 /* Try to open on pSnapshot when the next read-transaction starts |
3295 */ | 3498 */ |
3296 void sqlite3WalSnapshotOpen(Wal *pWal, sqlite3_snapshot *pSnapshot){ | 3499 void sqlite3WalSnapshotOpen(Wal *pWal, sqlite3_snapshot *pSnapshot){ |
3297 pWal->pSnapshot = (WalIndexHdr*)pSnapshot; | 3500 pWal->pSnapshot = (WalIndexHdr*)pSnapshot; |
3298 } | 3501 } |
| 3502 |
| 3503 /* |
| 3504 ** Return a +ve value if snapshot p1 is newer than p2. A -ve value if |
| 3505 ** p1 is older than p2 and zero if p1 and p2 are the same snapshot. |
| 3506 */ |
| 3507 int sqlite3_snapshot_cmp(sqlite3_snapshot *p1, sqlite3_snapshot *p2){ |
| 3508 WalIndexHdr *pHdr1 = (WalIndexHdr*)p1; |
| 3509 WalIndexHdr *pHdr2 = (WalIndexHdr*)p2; |
| 3510 |
| 3511 /* aSalt[0] is a copy of the value stored in the wal file header. It |
| 3512 ** is incremented each time the wal file is restarted. */ |
| 3513 if( pHdr1->aSalt[0]<pHdr2->aSalt[0] ) return -1; |
| 3514 if( pHdr1->aSalt[0]>pHdr2->aSalt[0] ) return +1; |
| 3515 if( pHdr1->mxFrame<pHdr2->mxFrame ) return -1; |
| 3516 if( pHdr1->mxFrame>pHdr2->mxFrame ) return +1; |
| 3517 return 0; |
| 3518 } |
3299 #endif /* SQLITE_ENABLE_SNAPSHOT */ | 3519 #endif /* SQLITE_ENABLE_SNAPSHOT */ |
3300 | 3520 |
3301 #ifdef SQLITE_ENABLE_ZIPVFS | 3521 #ifdef SQLITE_ENABLE_ZIPVFS |
3302 /* | 3522 /* |
3303 ** If the argument is not NULL, it points to a Wal object that holds a | 3523 ** If the argument is not NULL, it points to a Wal object that holds a |
3304 ** read-lock. This function returns the database page-size if it is known, | 3524 ** read-lock. This function returns the database page-size if it is known, |
3305 ** or zero if it is not (or if pWal is NULL). | 3525 ** or zero if it is not (or if pWal is NULL). |
3306 */ | 3526 */ |
3307 int sqlite3WalFramesize(Wal *pWal){ | 3527 int sqlite3WalFramesize(Wal *pWal){ |
3308 assert( pWal==0 || pWal->readLock>=0 ); | 3528 assert( pWal==0 || pWal->readLock>=0 ); |
3309 return (pWal ? pWal->szPage : 0); | 3529 return (pWal ? pWal->szPage : 0); |
3310 } | 3530 } |
3311 #endif | 3531 #endif |
3312 | 3532 |
3313 /* Return the sqlite3_file object for the WAL file | 3533 /* Return the sqlite3_file object for the WAL file |
3314 */ | 3534 */ |
3315 sqlite3_file *sqlite3WalFile(Wal *pWal){ | 3535 sqlite3_file *sqlite3WalFile(Wal *pWal){ |
3316 return pWal->pWalFd; | 3536 return pWal->pWalFd; |
3317 } | 3537 } |
3318 | 3538 |
3319 #endif /* #ifndef SQLITE_OMIT_WAL */ | 3539 #endif /* #ifndef SQLITE_OMIT_WAL */ |
OLD | NEW |