OLD | NEW |
1 /* | 1 /* |
2 ** 2010 February 1 | 2 ** 2010 February 1 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
135 ** The wal-index is transient. After a crash, the wal-index can (and should | 135 ** The wal-index is transient. After a crash, the wal-index can (and should |
136 ** be) reconstructed from the original WAL file. In fact, the VFS is required | 136 ** be) reconstructed from the original WAL file. In fact, the VFS is required |
137 ** to either truncate or zero the header of the wal-index when the last | 137 ** to either truncate or zero the header of the wal-index when the last |
138 ** connection to it closes. Because the wal-index is transient, it can | 138 ** connection to it closes. Because the wal-index is transient, it can |
139 ** use an architecture-specific format; it does not have to be cross-platform. | 139 ** use an architecture-specific format; it does not have to be cross-platform. |
140 ** Hence, unlike the database and WAL file formats which store all values | 140 ** Hence, unlike the database and WAL file formats which store all values |
141 ** as big endian, the wal-index can store multi-byte values in the native | 141 ** as big endian, the wal-index can store multi-byte values in the native |
142 ** byte order of the host computer. | 142 ** byte order of the host computer. |
143 ** | 143 ** |
144 ** The purpose of the wal-index is to answer this question quickly: Given | 144 ** The purpose of the wal-index is to answer this question quickly: Given |
145 ** a page number P, return the index of the last frame for page P in the WAL, | 145 ** a page number P and a maximum frame index M, return the index of the |
146 ** or return NULL if there are no frames for page P in the WAL. | 146 ** last frame in the wal before frame M for page P in the WAL, or return |
| 147 ** NULL if there are no frames for page P in the WAL prior to M. |
147 ** | 148 ** |
148 ** The wal-index consists of a header region, followed by an one or | 149 ** The wal-index consists of a header region, followed by an one or |
149 ** more index blocks. | 150 ** more index blocks. |
150 ** | 151 ** |
151 ** The wal-index header contains the total number of frames within the WAL | 152 ** The wal-index header contains the total number of frames within the WAL |
152 ** in the the mxFrame field. | 153 ** in the mxFrame field. |
153 ** | 154 ** |
154 ** Each index block except for the first contains information on | 155 ** Each index block except for the first contains information on |
155 ** HASHTABLE_NPAGE frames. The first index block contains information on | 156 ** HASHTABLE_NPAGE frames. The first index block contains information on |
156 ** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and | 157 ** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and |
157 ** HASHTABLE_NPAGE are selected so that together the wal-index header and | 158 ** HASHTABLE_NPAGE are selected so that together the wal-index header and |
158 ** first index block are the same size as all other index blocks in the | 159 ** first index block are the same size as all other index blocks in the |
159 ** wal-index. | 160 ** wal-index. |
160 ** | 161 ** |
161 ** Each index block contains two sections, a page-mapping that contains the | 162 ** Each index block contains two sections, a page-mapping that contains the |
162 ** database page number associated with each wal frame, and a hash-table | 163 ** database page number associated with each wal frame, and a hash-table |
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
405 | 406 |
406 /* | 407 /* |
407 ** An open write-ahead log file is represented by an instance of the | 408 ** An open write-ahead log file is represented by an instance of the |
408 ** following object. | 409 ** following object. |
409 */ | 410 */ |
410 struct Wal { | 411 struct Wal { |
411 sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ | 412 sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ |
412 sqlite3_file *pDbFd; /* File handle for the database file */ | 413 sqlite3_file *pDbFd; /* File handle for the database file */ |
413 sqlite3_file *pWalFd; /* File handle for WAL file */ | 414 sqlite3_file *pWalFd; /* File handle for WAL file */ |
414 u32 iCallback; /* Value to pass to log callback (or 0) */ | 415 u32 iCallback; /* Value to pass to log callback (or 0) */ |
| 416 i64 mxWalSize; /* Truncate WAL to this size upon reset */ |
415 int nWiData; /* Size of array apWiData */ | 417 int nWiData; /* Size of array apWiData */ |
| 418 int szFirstBlock; /* Size of first block written to WAL file */ |
416 volatile u32 **apWiData; /* Pointer to wal-index content in memory */ | 419 volatile u32 **apWiData; /* Pointer to wal-index content in memory */ |
417 u32 szPage; /* Database page size */ | 420 u32 szPage; /* Database page size */ |
418 i16 readLock; /* Which read lock is being held. -1 for none */ | 421 i16 readLock; /* Which read lock is being held. -1 for none */ |
| 422 u8 syncFlags; /* Flags to use to sync header writes */ |
419 u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ | 423 u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ |
420 u8 writeLock; /* True if in a write transaction */ | 424 u8 writeLock; /* True if in a write transaction */ |
421 u8 ckptLock; /* True if holding a checkpoint lock */ | 425 u8 ckptLock; /* True if holding a checkpoint lock */ |
422 u8 readOnly; /* True if the WAL file is open read-only */ | 426 u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ |
| 427 u8 truncateOnCommit; /* True to truncate WAL file on commit */ |
| 428 u8 syncHeader; /* Fsync the WAL header if true */ |
| 429 u8 padToSectorBoundary; /* Pad transactions out to the next sector */ |
423 WalIndexHdr hdr; /* Wal-index header for current transaction */ | 430 WalIndexHdr hdr; /* Wal-index header for current transaction */ |
424 const char *zWalName; /* Name of WAL file */ | 431 const char *zWalName; /* Name of WAL file */ |
425 u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ | 432 u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ |
426 #ifdef SQLITE_DEBUG | 433 #ifdef SQLITE_DEBUG |
427 u8 lockError; /* True if a locking error has occurred */ | 434 u8 lockError; /* True if a locking error has occurred */ |
428 #endif | 435 #endif |
429 }; | 436 }; |
430 | 437 |
431 /* | 438 /* |
432 ** Candidate values for Wal.exclusiveMode. | 439 ** Candidate values for Wal.exclusiveMode. |
433 */ | 440 */ |
434 #define WAL_NORMAL_MODE 0 | 441 #define WAL_NORMAL_MODE 0 |
435 #define WAL_EXCLUSIVE_MODE 1 | 442 #define WAL_EXCLUSIVE_MODE 1 |
436 #define WAL_HEAPMEMORY_MODE 2 | 443 #define WAL_HEAPMEMORY_MODE 2 |
437 | 444 |
438 /* | 445 /* |
| 446 ** Possible values for WAL.readOnly |
| 447 */ |
| 448 #define WAL_RDWR 0 /* Normal read/write connection */ |
| 449 #define WAL_RDONLY 1 /* The WAL file is readonly */ |
| 450 #define WAL_SHM_RDONLY 2 /* The SHM file is readonly */ |
| 451 |
| 452 /* |
439 ** Each page of the wal-index mapping contains a hash-table made up of | 453 ** Each page of the wal-index mapping contains a hash-table made up of |
440 ** an array of HASHTABLE_NSLOT elements of the following type. | 454 ** an array of HASHTABLE_NSLOT elements of the following type. |
441 */ | 455 */ |
442 typedef u16 ht_slot; | 456 typedef u16 ht_slot; |
443 | 457 |
444 /* | 458 /* |
445 ** This structure is used to implement an iterator that loops through | 459 ** This structure is used to implement an iterator that loops through |
446 ** all frames in the WAL in database page order. Where two or more frames | 460 ** all frames in the WAL in database page order. Where two or more frames |
447 ** correspond to the same database page, the iterator visits only the | 461 ** correspond to the same database page, the iterator visits only the |
448 ** frame most recently written to the WAL (in other words, the frame with | 462 ** frame most recently written to the WAL (in other words, the frame with |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
521 | 535 |
522 /* Request a pointer to the required page from the VFS */ | 536 /* Request a pointer to the required page from the VFS */ |
523 if( pWal->apWiData[iPage]==0 ){ | 537 if( pWal->apWiData[iPage]==0 ){ |
524 if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ | 538 if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ |
525 pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); | 539 pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); |
526 if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM; | 540 if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM; |
527 }else{ | 541 }else{ |
528 rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, | 542 rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, |
529 pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] | 543 pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] |
530 ); | 544 ); |
| 545 if( rc==SQLITE_READONLY ){ |
| 546 pWal->readOnly |= WAL_SHM_RDONLY; |
| 547 rc = SQLITE_OK; |
| 548 } |
531 } | 549 } |
532 } | 550 } |
533 | 551 |
534 *ppPage = pWal->apWiData[iPage]; | 552 *ppPage = pWal->apWiData[iPage]; |
535 assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); | 553 assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); |
536 return rc; | 554 return rc; |
537 } | 555 } |
538 | 556 |
539 /* | 557 /* |
540 ** Return a pointer to the WalCkptInfo structure in the wal-index. | 558 ** Return a pointer to the WalCkptInfo structure in the wal-index. |
541 */ | 559 */ |
542 static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ | 560 static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ |
543 assert( pWal->nWiData>0 && pWal->apWiData[0] ); | 561 assert( pWal->nWiData>0 && pWal->apWiData[0] ); |
544 return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); | 562 return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); |
545 } | 563 } |
546 | 564 |
547 /* | 565 /* |
548 ** Return a pointer to the WalIndexHdr structure in the wal-index. | 566 ** Return a pointer to the WalIndexHdr structure in the wal-index. |
549 */ | 567 */ |
550 static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ | 568 static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ |
551 assert( pWal->nWiData>0 && pWal->apWiData[0] ); | 569 assert( pWal->nWiData>0 && pWal->apWiData[0] ); |
552 return (volatile WalIndexHdr*)pWal->apWiData[0]; | 570 return (volatile WalIndexHdr*)pWal->apWiData[0]; |
553 } | 571 } |
554 | 572 |
555 /* | 573 /* |
556 ** The argument to this macro must be of type u32. On a little-endian | 574 ** The argument to this macro must be of type u32. On a little-endian |
557 ** architecture, it returns the u32 value that results from interpreting | 575 ** architecture, it returns the u32 value that results from interpreting |
558 ** the 4 bytes as a big-endian value. On a big-endian architecture, it | 576 ** the 4 bytes as a big-endian value. On a big-endian architecture, it |
559 ** returns the value that would be produced by intepreting the 4 bytes | 577 ** returns the value that would be produced by interpreting the 4 bytes |
560 ** of the input value as a little-endian integer. | 578 ** of the input value as a little-endian integer. |
561 */ | 579 */ |
562 #define BYTESWAP32(x) ( \ | 580 #define BYTESWAP32(x) ( \ |
563 (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \ | 581 (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \ |
564 + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \ | 582 + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \ |
565 ) | 583 ) |
566 | 584 |
567 /* | 585 /* |
568 ** Generate or extend an 8 byte checksum based on the data in | 586 ** Generate or extend an 8 byte checksum based on the data in |
569 ** array aByte[] and the initial values of aIn[0] and aIn[1] (or | 587 ** array aByte[] and the initial values of aIn[0] and aIn[1] (or |
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
963 */ | 981 */ |
964 if( rc==SQLITE_OK ){ | 982 if( rc==SQLITE_OK ){ |
965 int iKey; /* Hash table key */ | 983 int iKey; /* Hash table key */ |
966 int idx; /* Value to write to hash-table slot */ | 984 int idx; /* Value to write to hash-table slot */ |
967 int nCollide; /* Number of hash collisions */ | 985 int nCollide; /* Number of hash collisions */ |
968 | 986 |
969 idx = iFrame - iZero; | 987 idx = iFrame - iZero; |
970 assert( idx <= HASHTABLE_NSLOT/2 + 1 ); | 988 assert( idx <= HASHTABLE_NSLOT/2 + 1 ); |
971 | 989 |
972 /* If this is the first entry to be added to this hash-table, zero the | 990 /* If this is the first entry to be added to this hash-table, zero the |
973 ** entire hash table and aPgno[] array before proceding. | 991 ** entire hash table and aPgno[] array before proceeding. |
974 */ | 992 */ |
975 if( idx==1 ){ | 993 if( idx==1 ){ |
976 int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]); | 994 int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]); |
977 memset((void*)&aPgno[1], 0, nByte); | 995 memset((void*)&aPgno[1], 0, nByte); |
978 } | 996 } |
979 | 997 |
980 /* If the entry in aPgno[] is already set, then the previous writer | 998 /* If the entry in aPgno[] is already set, then the previous writer |
981 ** must have exited unexpectedly in the middle of a transaction (after | 999 ** must have exited unexpectedly in the middle of a transaction (after |
982 ** writing one or more dirty pages to the WAL to free up memory). | 1000 ** writing one or more dirty pages to the WAL to free up memory). |
983 ** Remove the remnants of that writers uncommitted transaction from | 1001 ** Remove the remnants of that writers uncommitted transaction from |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1074 if( nSize>WAL_HDRSIZE ){ | 1092 if( nSize>WAL_HDRSIZE ){ |
1075 u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ | 1093 u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ |
1076 u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ | 1094 u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ |
1077 int szFrame; /* Number of bytes in buffer aFrame[] */ | 1095 int szFrame; /* Number of bytes in buffer aFrame[] */ |
1078 u8 *aData; /* Pointer to data part of aFrame buffer */ | 1096 u8 *aData; /* Pointer to data part of aFrame buffer */ |
1079 int iFrame; /* Index of last frame read */ | 1097 int iFrame; /* Index of last frame read */ |
1080 i64 iOffset; /* Next offset to read from log file */ | 1098 i64 iOffset; /* Next offset to read from log file */ |
1081 int szPage; /* Page size according to the log */ | 1099 int szPage; /* Page size according to the log */ |
1082 u32 magic; /* Magic value read from WAL header */ | 1100 u32 magic; /* Magic value read from WAL header */ |
1083 u32 version; /* Magic value read from WAL header */ | 1101 u32 version; /* Magic value read from WAL header */ |
| 1102 int isValid; /* True if this frame is valid */ |
1084 | 1103 |
1085 /* Read in the WAL header. */ | 1104 /* Read in the WAL header. */ |
1086 rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); | 1105 rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); |
1087 if( rc!=SQLITE_OK ){ | 1106 if( rc!=SQLITE_OK ){ |
1088 goto recovery_error; | 1107 goto recovery_error; |
1089 } | 1108 } |
1090 | 1109 |
1091 /* If the database page size is not a power of two, or is greater than | 1110 /* If the database page size is not a power of two, or is greater than |
1092 ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid | 1111 ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid |
1093 ** data. Similarly, if the 'magic' value is invalid, ignore the whole | 1112 ** data. Similarly, if the 'magic' value is invalid, ignore the whole |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1132 rc = SQLITE_NOMEM; | 1151 rc = SQLITE_NOMEM; |
1133 goto recovery_error; | 1152 goto recovery_error; |
1134 } | 1153 } |
1135 aData = &aFrame[WAL_FRAME_HDRSIZE]; | 1154 aData = &aFrame[WAL_FRAME_HDRSIZE]; |
1136 | 1155 |
1137 /* Read all frames from the log file. */ | 1156 /* Read all frames from the log file. */ |
1138 iFrame = 0; | 1157 iFrame = 0; |
1139 for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ | 1158 for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ |
1140 u32 pgno; /* Database page number for frame */ | 1159 u32 pgno; /* Database page number for frame */ |
1141 u32 nTruncate; /* dbsize field from frame header */ | 1160 u32 nTruncate; /* dbsize field from frame header */ |
1142 int isValid; /* True if this frame is valid */ | |
1143 | 1161 |
1144 /* Read and decode the next log frame. */ | 1162 /* Read and decode the next log frame. */ |
| 1163 iFrame++; |
1145 rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); | 1164 rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); |
1146 if( rc!=SQLITE_OK ) break; | 1165 if( rc!=SQLITE_OK ) break; |
1147 isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); | 1166 isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); |
1148 if( !isValid ) break; | 1167 if( !isValid ) break; |
1149 rc = walIndexAppend(pWal, ++iFrame, pgno); | 1168 rc = walIndexAppend(pWal, iFrame, pgno); |
1150 if( rc!=SQLITE_OK ) break; | 1169 if( rc!=SQLITE_OK ) break; |
1151 | 1170 |
1152 /* If nTruncate is non-zero, this is a commit record. */ | 1171 /* If nTruncate is non-zero, this is a commit record. */ |
1153 if( nTruncate ){ | 1172 if( nTruncate ){ |
1154 pWal->hdr.mxFrame = iFrame; | 1173 pWal->hdr.mxFrame = iFrame; |
1155 pWal->hdr.nPage = nTruncate; | 1174 pWal->hdr.nPage = nTruncate; |
1156 pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); | 1175 pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); |
1157 testcase( szPage<=32768 ); | 1176 testcase( szPage<=32768 ); |
1158 testcase( szPage>=65536 ); | 1177 testcase( szPage>=65536 ); |
1159 aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; | 1178 aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; |
(...skipping 13 matching lines...) Expand all Loading... |
1173 walIndexWriteHdr(pWal); | 1192 walIndexWriteHdr(pWal); |
1174 | 1193 |
1175 /* Reset the checkpoint-header. This is safe because this thread is | 1194 /* Reset the checkpoint-header. This is safe because this thread is |
1176 ** currently holding locks that exclude all other readers, writers and | 1195 ** currently holding locks that exclude all other readers, writers and |
1177 ** checkpointers. | 1196 ** checkpointers. |
1178 */ | 1197 */ |
1179 pInfo = walCkptInfo(pWal); | 1198 pInfo = walCkptInfo(pWal); |
1180 pInfo->nBackfill = 0; | 1199 pInfo->nBackfill = 0; |
1181 pInfo->aReadMark[0] = 0; | 1200 pInfo->aReadMark[0] = 0; |
1182 for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | 1201 for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; |
| 1202 if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; |
1183 | 1203 |
1184 /* If more than one frame was recovered from the log file, report an | 1204 /* If more than one frame was recovered from the log file, report an |
1185 ** event via sqlite3_log(). This is to help with identifying performance | 1205 ** event via sqlite3_log(). This is to help with identifying performance |
1186 ** problems caused by applications routinely shutting down without | 1206 ** problems caused by applications routinely shutting down without |
1187 ** checkpointing the log file. | 1207 ** checkpointing the log file. |
1188 */ | 1208 */ |
1189 if( pWal->hdr.nPage ){ | 1209 if( pWal->hdr.nPage ){ |
1190 sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s", | 1210 sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, |
1191 pWal->hdr.nPage, pWal->zWalName | 1211 "recovered %d frames from WAL file %s", |
| 1212 pWal->hdr.mxFrame, pWal->zWalName |
1192 ); | 1213 ); |
1193 } | 1214 } |
1194 } | 1215 } |
1195 | 1216 |
1196 recovery_error: | 1217 recovery_error: |
1197 WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); | 1218 WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); |
1198 walUnlockExclusive(pWal, iLock, nLock); | 1219 walUnlockExclusive(pWal, iLock, nLock); |
1199 return rc; | 1220 return rc; |
1200 } | 1221 } |
1201 | 1222 |
(...skipping 25 matching lines...) Expand all Loading... |
1227 ** | 1248 ** |
1228 ** If the log file is successfully opened, SQLITE_OK is returned and | 1249 ** If the log file is successfully opened, SQLITE_OK is returned and |
1229 ** *ppWal is set to point to a new WAL handle. If an error occurs, | 1250 ** *ppWal is set to point to a new WAL handle. If an error occurs, |
1230 ** an SQLite error code is returned and *ppWal is left unmodified. | 1251 ** an SQLite error code is returned and *ppWal is left unmodified. |
1231 */ | 1252 */ |
1232 int sqlite3WalOpen( | 1253 int sqlite3WalOpen( |
1233 sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ | 1254 sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ |
1234 sqlite3_file *pDbFd, /* The open database file */ | 1255 sqlite3_file *pDbFd, /* The open database file */ |
1235 const char *zWalName, /* Name of the WAL file */ | 1256 const char *zWalName, /* Name of the WAL file */ |
1236 int bNoShm, /* True to run in heap-memory mode */ | 1257 int bNoShm, /* True to run in heap-memory mode */ |
| 1258 i64 mxWalSize, /* Truncate WAL to this size on reset */ |
1237 Wal **ppWal /* OUT: Allocated Wal handle */ | 1259 Wal **ppWal /* OUT: Allocated Wal handle */ |
1238 ){ | 1260 ){ |
1239 int rc; /* Return Code */ | 1261 int rc; /* Return Code */ |
1240 Wal *pRet; /* Object to allocate and return */ | 1262 Wal *pRet; /* Object to allocate and return */ |
1241 int flags; /* Flags passed to OsOpen() */ | 1263 int flags; /* Flags passed to OsOpen() */ |
1242 | 1264 |
1243 assert( zWalName && zWalName[0] ); | 1265 assert( zWalName && zWalName[0] ); |
1244 assert( pDbFd ); | 1266 assert( pDbFd ); |
1245 | 1267 |
1246 /* In the amalgamation, the os_unix.c and os_win.c source files come before | 1268 /* In the amalgamation, the os_unix.c and os_win.c source files come before |
(...skipping 12 matching lines...) Expand all Loading... |
1259 *ppWal = 0; | 1281 *ppWal = 0; |
1260 pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); | 1282 pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); |
1261 if( !pRet ){ | 1283 if( !pRet ){ |
1262 return SQLITE_NOMEM; | 1284 return SQLITE_NOMEM; |
1263 } | 1285 } |
1264 | 1286 |
1265 pRet->pVfs = pVfs; | 1287 pRet->pVfs = pVfs; |
1266 pRet->pWalFd = (sqlite3_file *)&pRet[1]; | 1288 pRet->pWalFd = (sqlite3_file *)&pRet[1]; |
1267 pRet->pDbFd = pDbFd; | 1289 pRet->pDbFd = pDbFd; |
1268 pRet->readLock = -1; | 1290 pRet->readLock = -1; |
| 1291 pRet->mxWalSize = mxWalSize; |
1269 pRet->zWalName = zWalName; | 1292 pRet->zWalName = zWalName; |
| 1293 pRet->syncHeader = 1; |
| 1294 pRet->padToSectorBoundary = 1; |
1270 pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); | 1295 pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); |
1271 | 1296 |
1272 /* Open file handle on the write-ahead log file. */ | 1297 /* Open file handle on the write-ahead log file. */ |
1273 flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); | 1298 flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); |
1274 rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); | 1299 rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); |
1275 if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ | 1300 if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ |
1276 pRet->readOnly = 1; | 1301 pRet->readOnly = WAL_RDONLY; |
1277 } | 1302 } |
1278 | 1303 |
1279 if( rc!=SQLITE_OK ){ | 1304 if( rc!=SQLITE_OK ){ |
1280 walIndexClose(pRet, 0); | 1305 walIndexClose(pRet, 0); |
1281 sqlite3OsClose(pRet->pWalFd); | 1306 sqlite3OsClose(pRet->pWalFd); |
1282 sqlite3_free(pRet); | 1307 sqlite3_free(pRet); |
1283 }else{ | 1308 }else{ |
| 1309 int iDC = sqlite3OsDeviceCharacteristics(pDbFd); |
| 1310 if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } |
| 1311 if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ |
| 1312 pRet->padToSectorBoundary = 0; |
| 1313 } |
1284 *ppWal = pRet; | 1314 *ppWal = pRet; |
1285 WALTRACE(("WAL%d: opened\n", pRet)); | 1315 WALTRACE(("WAL%d: opened\n", pRet)); |
1286 } | 1316 } |
1287 return rc; | 1317 return rc; |
1288 } | 1318 } |
1289 | 1319 |
1290 /* | 1320 /* |
| 1321 ** Change the size to which the WAL file is trucated on each reset. |
| 1322 */ |
| 1323 void sqlite3WalLimit(Wal *pWal, i64 iLimit){ |
| 1324 if( pWal ) pWal->mxWalSize = iLimit; |
| 1325 } |
| 1326 |
| 1327 /* |
1291 ** Find the smallest page number out of all pages held in the WAL that | 1328 ** Find the smallest page number out of all pages held in the WAL that |
1292 ** has not been returned by any prior invocation of this method on the | 1329 ** has not been returned by any prior invocation of this method on the |
1293 ** same WalIterator object. Write into *piFrame the frame index where | 1330 ** same WalIterator object. Write into *piFrame the frame index where |
1294 ** that page was last written into the WAL. Write into *piPage the page | 1331 ** that page was last written into the WAL. Write into *piPage the page |
1295 ** number. | 1332 ** number. |
1296 ** | 1333 ** |
1297 ** Return 0 on success. If there are no pages in the WAL with a page | 1334 ** Return 0 on success. If there are no pages in the WAL with a page |
1298 ** number larger than *piPage, then return 1. | 1335 ** number larger than *piPage, then return 1. |
1299 */ | 1336 */ |
1300 static int walIteratorNext( | 1337 static int walIteratorNext( |
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1602 ** Fsync is called on the WAL before writing content out of the WAL and | 1639 ** Fsync is called on the WAL before writing content out of the WAL and |
1603 ** into the database. This ensures that if the new content is persistent | 1640 ** into the database. This ensures that if the new content is persistent |
1604 ** in the WAL and can be recovered following a power-loss or hard reset. | 1641 ** in the WAL and can be recovered following a power-loss or hard reset. |
1605 ** | 1642 ** |
1606 ** Fsync is also called on the database file if (and only if) the entire | 1643 ** Fsync is also called on the database file if (and only if) the entire |
1607 ** WAL content is copied into the database file. This second fsync makes | 1644 ** WAL content is copied into the database file. This second fsync makes |
1608 ** it safe to delete the WAL since the new content will persist in the | 1645 ** it safe to delete the WAL since the new content will persist in the |
1609 ** database file. | 1646 ** database file. |
1610 ** | 1647 ** |
1611 ** This routine uses and updates the nBackfill field of the wal-index header. | 1648 ** This routine uses and updates the nBackfill field of the wal-index header. |
1612 ** This is the only routine tha will increase the value of nBackfill. | 1649 ** This is the only routine that will increase the value of nBackfill. |
1613 ** (A WAL reset or recovery will revert nBackfill to zero, but not increase | 1650 ** (A WAL reset or recovery will revert nBackfill to zero, but not increase |
1614 ** its value.) | 1651 ** its value.) |
1615 ** | 1652 ** |
1616 ** The caller must be holding sufficient locks to ensure that no other | 1653 ** The caller must be holding sufficient locks to ensure that no other |
1617 ** checkpoint is running (in any other thread or process) at the same | 1654 ** checkpoint is running (in any other thread or process) at the same |
1618 ** time. | 1655 ** time. |
1619 */ | 1656 */ |
1620 static int walCheckpoint( | 1657 static int walCheckpoint( |
1621 Wal *pWal, /* Wal connection */ | 1658 Wal *pWal, /* Wal connection */ |
1622 int eMode, /* One of PASSIVE, FULL or RESTART */ | 1659 int eMode, /* One of PASSIVE, FULL or RESTART */ |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1657 ** cannot be backfilled from the WAL. | 1694 ** cannot be backfilled from the WAL. |
1658 */ | 1695 */ |
1659 mxSafeFrame = pWal->hdr.mxFrame; | 1696 mxSafeFrame = pWal->hdr.mxFrame; |
1660 mxPage = pWal->hdr.nPage; | 1697 mxPage = pWal->hdr.nPage; |
1661 for(i=1; i<WAL_NREADER; i++){ | 1698 for(i=1; i<WAL_NREADER; i++){ |
1662 u32 y = pInfo->aReadMark[i]; | 1699 u32 y = pInfo->aReadMark[i]; |
1663 if( mxSafeFrame>y ){ | 1700 if( mxSafeFrame>y ){ |
1664 assert( y<=pWal->hdr.mxFrame ); | 1701 assert( y<=pWal->hdr.mxFrame ); |
1665 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); | 1702 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); |
1666 if( rc==SQLITE_OK ){ | 1703 if( rc==SQLITE_OK ){ |
1667 pInfo->aReadMark[i] = READMARK_NOT_USED; | 1704 pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); |
1668 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); | 1705 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); |
1669 }else if( rc==SQLITE_BUSY ){ | 1706 }else if( rc==SQLITE_BUSY ){ |
1670 mxSafeFrame = y; | 1707 mxSafeFrame = y; |
1671 xBusy = 0; | 1708 xBusy = 0; |
1672 }else{ | 1709 }else{ |
1673 goto walcheckpoint_out; | 1710 goto walcheckpoint_out; |
1674 } | 1711 } |
1675 } | 1712 } |
1676 } | 1713 } |
1677 | 1714 |
1678 if( pInfo->nBackfill<mxSafeFrame | 1715 if( pInfo->nBackfill<mxSafeFrame |
1679 && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK | 1716 && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK |
1680 ){ | 1717 ){ |
1681 i64 nSize; /* Current size of database file */ | 1718 i64 nSize; /* Current size of database file */ |
1682 u32 nBackfill = pInfo->nBackfill; | 1719 u32 nBackfill = pInfo->nBackfill; |
1683 | 1720 |
1684 /* Sync the WAL to disk */ | 1721 /* Sync the WAL to disk */ |
1685 if( sync_flags ){ | 1722 if( sync_flags ){ |
1686 rc = sqlite3OsSync(pWal->pWalFd, sync_flags); | 1723 rc = sqlite3OsSync(pWal->pWalFd, sync_flags); |
1687 } | 1724 } |
1688 | 1725 |
1689 /* If the database file may grow as a result of this checkpoint, hint | 1726 /* If the database may grow as a result of this checkpoint, hint |
1690 ** about the eventual size of the db file to the VFS layer. | 1727 ** about the eventual size of the db file to the VFS layer. |
1691 */ | 1728 */ |
1692 if( rc==SQLITE_OK ){ | 1729 if( rc==SQLITE_OK ){ |
1693 i64 nReq = ((i64)mxPage * szPage); | 1730 i64 nReq = ((i64)mxPage * szPage); |
1694 rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); | 1731 rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); |
1695 if( rc==SQLITE_OK && nSize<nReq ){ | 1732 if( rc==SQLITE_OK && nSize<nReq ){ |
1696 sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); | 1733 sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); |
1697 } | 1734 } |
1698 } | 1735 } |
1699 | 1736 |
| 1737 |
1700 /* Iterate through the contents of the WAL, copying data to the db file. */ | 1738 /* Iterate through the contents of the WAL, copying data to the db file. */ |
1701 while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ | 1739 while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ |
1702 i64 iOffset; | 1740 i64 iOffset; |
1703 assert( walFramePgno(pWal, iFrame)==iDbpage ); | 1741 assert( walFramePgno(pWal, iFrame)==iDbpage ); |
1704 if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue; | 1742 if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue; |
1705 iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; | 1743 iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; |
1706 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ | 1744 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ |
1707 rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); | 1745 rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); |
1708 if( rc!=SQLITE_OK ) break; | 1746 if( rc!=SQLITE_OK ) break; |
1709 iOffset = (iDbpage-1)*(i64)szPage; | 1747 iOffset = (iDbpage-1)*(i64)szPage; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1754 } | 1792 } |
1755 } | 1793 } |
1756 } | 1794 } |
1757 | 1795 |
1758 walcheckpoint_out: | 1796 walcheckpoint_out: |
1759 walIteratorFree(pIter); | 1797 walIteratorFree(pIter); |
1760 return rc; | 1798 return rc; |
1761 } | 1799 } |
1762 | 1800 |
1763 /* | 1801 /* |
| 1802 ** If the WAL file is currently larger than nMax bytes in size, truncate |
| 1803 ** it to exactly nMax bytes. If an error occurs while doing so, ignore it. |
| 1804 */ |
| 1805 static void walLimitSize(Wal *pWal, i64 nMax){ |
| 1806 i64 sz; |
| 1807 int rx; |
| 1808 sqlite3BeginBenignMalloc(); |
| 1809 rx = sqlite3OsFileSize(pWal->pWalFd, &sz); |
| 1810 if( rx==SQLITE_OK && (sz > nMax ) ){ |
| 1811 rx = sqlite3OsTruncate(pWal->pWalFd, nMax); |
| 1812 } |
| 1813 sqlite3EndBenignMalloc(); |
| 1814 if( rx ){ |
| 1815 sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); |
| 1816 } |
| 1817 } |
| 1818 |
| 1819 /* |
1764 ** Close a connection to a log file. | 1820 ** Close a connection to a log file. |
1765 */ | 1821 */ |
1766 int sqlite3WalClose( | 1822 int sqlite3WalClose( |
1767 Wal *pWal, /* Wal to close */ | 1823 Wal *pWal, /* Wal to close */ |
1768 int sync_flags, /* Flags to pass to OsSync() (or 0) */ | 1824 int sync_flags, /* Flags to pass to OsSync() (or 0) */ |
1769 int nBuf, | 1825 int nBuf, |
1770 u8 *zBuf /* Buffer of at least nBuf bytes */ | 1826 u8 *zBuf /* Buffer of at least nBuf bytes */ |
1771 ){ | 1827 ){ |
1772 int rc = SQLITE_OK; | 1828 int rc = SQLITE_OK; |
1773 if( pWal ){ | 1829 if( pWal ){ |
1774 int isDelete = 0; /* True to unlink wal and wal-index files */ | 1830 int isDelete = 0; /* True to unlink wal and wal-index files */ |
1775 | 1831 |
1776 /* If an EXCLUSIVE lock can be obtained on the database file (using the | 1832 /* If an EXCLUSIVE lock can be obtained on the database file (using the |
1777 ** ordinary, rollback-mode locking methods, this guarantees that the | 1833 ** ordinary, rollback-mode locking methods, this guarantees that the |
1778 ** connection associated with this log file is the only connection to | 1834 ** connection associated with this log file is the only connection to |
1779 ** the database. In this case checkpoint the database and unlink both | 1835 ** the database. In this case checkpoint the database and unlink both |
1780 ** the wal and wal-index files. | 1836 ** the wal and wal-index files. |
1781 ** | 1837 ** |
1782 ** The EXCLUSIVE lock is not released before returning. | 1838 ** The EXCLUSIVE lock is not released before returning. |
1783 */ | 1839 */ |
1784 rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); | 1840 rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); |
1785 if( rc==SQLITE_OK ){ | 1841 if( rc==SQLITE_OK ){ |
1786 if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ | 1842 if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ |
1787 pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; | 1843 pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; |
1788 } | 1844 } |
1789 rc = sqlite3WalCheckpoint( | 1845 rc = sqlite3WalCheckpoint( |
1790 pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 | 1846 pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 |
1791 ); | 1847 ); |
1792 if( rc==SQLITE_OK ){ | 1848 if( rc==SQLITE_OK ){ |
1793 isDelete = 1; | 1849 int bPersist = -1; |
| 1850 sqlite3OsFileControlHint( |
| 1851 pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist |
| 1852 ); |
| 1853 if( bPersist!=1 ){ |
| 1854 /* Try to delete the WAL file if the checkpoint completed and |
| 1855 ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal |
| 1856 ** mode (!bPersist) */ |
| 1857 isDelete = 1; |
| 1858 }else if( pWal->mxWalSize>=0 ){ |
| 1859 /* Try to truncate the WAL file to zero bytes if the checkpoint |
| 1860 ** completed and fsynced (rc==SQLITE_OK) and we are in persistent |
| 1861 ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a |
| 1862 ** non-negative value (pWal->mxWalSize>=0). Note that we truncate |
| 1863 ** to zero bytes as truncating to the journal_size_limit might |
| 1864 ** leave a corrupt WAL file on disk. */ |
| 1865 walLimitSize(pWal, 0); |
| 1866 } |
1794 } | 1867 } |
1795 } | 1868 } |
1796 | 1869 |
1797 walIndexClose(pWal, isDelete); | 1870 walIndexClose(pWal, isDelete); |
1798 sqlite3OsClose(pWal->pWalFd); | 1871 sqlite3OsClose(pWal->pWalFd); |
1799 if( isDelete ){ | 1872 if( isDelete ){ |
| 1873 sqlite3BeginBenignMalloc(); |
1800 sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); | 1874 sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); |
| 1875 sqlite3EndBenignMalloc(); |
1801 } | 1876 } |
1802 WALTRACE(("WAL%p: closed\n", pWal)); | 1877 WALTRACE(("WAL%p: closed\n", pWal)); |
1803 sqlite3_free((void *)pWal->apWiData); | 1878 sqlite3_free((void *)pWal->apWiData); |
1804 sqlite3_free(pWal); | 1879 sqlite3_free(pWal); |
1805 } | 1880 } |
1806 return rc; | 1881 return rc; |
1807 } | 1882 } |
1808 | 1883 |
1809 /* | 1884 /* |
1810 ** Try to read the wal-index header. Return 0 on success and 1 if | 1885 ** Try to read the wal-index header. Return 0 on success and 1 if |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1868 /* The header was successfully read. Return zero. */ | 1943 /* The header was successfully read. Return zero. */ |
1869 return 0; | 1944 return 0; |
1870 } | 1945 } |
1871 | 1946 |
1872 /* | 1947 /* |
1873 ** Read the wal-index header from the wal-index and into pWal->hdr. | 1948 ** Read the wal-index header from the wal-index and into pWal->hdr. |
1874 ** If the wal-header appears to be corrupt, try to reconstruct the | 1949 ** If the wal-header appears to be corrupt, try to reconstruct the |
1875 ** wal-index from the WAL before returning. | 1950 ** wal-index from the WAL before returning. |
1876 ** | 1951 ** |
1877 ** Set *pChanged to 1 if the wal-index header value in pWal->hdr is | 1952 ** Set *pChanged to 1 if the wal-index header value in pWal->hdr is |
1878 ** changed by this opertion. If pWal->hdr is unchanged, set *pChanged | 1953 ** changed by this operation. If pWal->hdr is unchanged, set *pChanged |
1879 ** to 0. | 1954 ** to 0. |
1880 ** | 1955 ** |
1881 ** If the wal-index header is successfully read, return SQLITE_OK. | 1956 ** If the wal-index header is successfully read, return SQLITE_OK. |
1882 ** Otherwise an SQLite error code. | 1957 ** Otherwise an SQLite error code. |
1883 */ | 1958 */ |
1884 static int walIndexReadHdr(Wal *pWal, int *pChanged){ | 1959 static int walIndexReadHdr(Wal *pWal, int *pChanged){ |
1885 int rc; /* Return code */ | 1960 int rc; /* Return code */ |
1886 int badHdr; /* True if a header read failed */ | 1961 int badHdr; /* True if a header read failed */ |
1887 volatile u32 *page0; /* Chunk of wal-index containing header */ | 1962 volatile u32 *page0; /* Chunk of wal-index containing header */ |
1888 | 1963 |
(...skipping 11 matching lines...) Expand all Loading... |
1900 ** wal-index header immediately, without holding any lock. This usually | 1975 ** wal-index header immediately, without holding any lock. This usually |
1901 ** works, but may fail if the wal-index header is corrupt or currently | 1976 ** works, but may fail if the wal-index header is corrupt or currently |
1902 ** being modified by another thread or process. | 1977 ** being modified by another thread or process. |
1903 */ | 1978 */ |
1904 badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); | 1979 badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); |
1905 | 1980 |
1906 /* If the first attempt failed, it might have been due to a race | 1981 /* If the first attempt failed, it might have been due to a race |
1907 ** with a writer. So get a WRITE lock and try again. | 1982 ** with a writer. So get a WRITE lock and try again. |
1908 */ | 1983 */ |
1909 assert( badHdr==0 || pWal->writeLock==0 ); | 1984 assert( badHdr==0 || pWal->writeLock==0 ); |
1910 if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ | 1985 if( badHdr ){ |
1911 pWal->writeLock = 1; | 1986 if( pWal->readOnly & WAL_SHM_RDONLY ){ |
1912 if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ | 1987 if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ |
1913 badHdr = walIndexTryHdr(pWal, pChanged); | 1988 walUnlockShared(pWal, WAL_WRITE_LOCK); |
1914 if( badHdr ){ | 1989 rc = SQLITE_READONLY_RECOVERY; |
1915 /* If the wal-index header is still malformed even while holding | |
1916 ** a WRITE lock, it can only mean that the header is corrupted and | |
1917 ** needs to be reconstructed. So run recovery to do exactly that. | |
1918 */ | |
1919 rc = walIndexRecover(pWal); | |
1920 *pChanged = 1; | |
1921 } | 1990 } |
| 1991 }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ |
| 1992 pWal->writeLock = 1; |
| 1993 if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ |
| 1994 badHdr = walIndexTryHdr(pWal, pChanged); |
| 1995 if( badHdr ){ |
| 1996 /* If the wal-index header is still malformed even while holding |
| 1997 ** a WRITE lock, it can only mean that the header is corrupted and |
| 1998 ** needs to be reconstructed. So run recovery to do exactly that. |
| 1999 */ |
| 2000 rc = walIndexRecover(pWal); |
| 2001 *pChanged = 1; |
| 2002 } |
| 2003 } |
| 2004 pWal->writeLock = 0; |
| 2005 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); |
1922 } | 2006 } |
1923 pWal->writeLock = 0; | |
1924 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | |
1925 } | 2007 } |
1926 | 2008 |
1927 /* If the header is read successfully, check the version number to make | 2009 /* If the header is read successfully, check the version number to make |
1928 ** sure the wal-index was not constructed with some future format that | 2010 ** sure the wal-index was not constructed with some future format that |
1929 ** this version of SQLite cannot understand. | 2011 ** this version of SQLite cannot understand. |
1930 */ | 2012 */ |
1931 if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ | 2013 if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ |
1932 rc = SQLITE_CANTOPEN_BKPT; | 2014 rc = SQLITE_CANTOPEN_BKPT; |
1933 } | 2015 } |
1934 | 2016 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2007 ** locks are held, so the locks should not be held for very long. But | 2089 ** locks are held, so the locks should not be held for very long. But |
2008 ** if we are unlucky, another process that is holding a lock might get | 2090 ** if we are unlucky, another process that is holding a lock might get |
2009 ** paged out or take a page-fault that is time-consuming to resolve, | 2091 ** paged out or take a page-fault that is time-consuming to resolve, |
2010 ** during the few nanoseconds that it is holding the lock. In that case, | 2092 ** during the few nanoseconds that it is holding the lock. In that case, |
2011 ** it might take longer than normal for the lock to free. | 2093 ** it might take longer than normal for the lock to free. |
2012 ** | 2094 ** |
2013 ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few | 2095 ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few |
2014 ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this | 2096 ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this |
2015 ** is more of a scheduler yield than an actual delay. But on the 10th | 2097 ** is more of a scheduler yield than an actual delay. But on the 10th |
2016 ** an subsequent retries, the delays start becoming longer and longer, | 2098 ** an subsequent retries, the delays start becoming longer and longer, |
2017 ** so that on the 100th (and last) RETRY we delay for 21 milliseconds. | 2099 ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. |
2018 ** The total delay time before giving up is less than 1 second. | 2100 ** The total delay time before giving up is less than 10 seconds. |
2019 */ | 2101 */ |
2020 if( cnt>5 ){ | 2102 if( cnt>5 ){ |
2021 int nDelay = 1; /* Pause time in microseconds */ | 2103 int nDelay = 1; /* Pause time in microseconds */ |
2022 if( cnt>100 ){ | 2104 if( cnt>100 ){ |
2023 VVA_ONLY( pWal->lockError = 1; ) | 2105 VVA_ONLY( pWal->lockError = 1; ) |
2024 return SQLITE_PROTOCOL; | 2106 return SQLITE_PROTOCOL; |
2025 } | 2107 } |
2026 if( cnt>=10 ) nDelay = (cnt-9)*238; /* Max delay 21ms. Total delay 996ms */ | 2108 if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; |
2027 sqlite3OsSleep(pWal->pVfs, nDelay); | 2109 sqlite3OsSleep(pWal->pVfs, nDelay); |
2028 } | 2110 } |
2029 | 2111 |
2030 if( !useWal ){ | 2112 if( !useWal ){ |
2031 rc = walIndexReadHdr(pWal, pChanged); | 2113 rc = walIndexReadHdr(pWal, pChanged); |
2032 if( rc==SQLITE_BUSY ){ | 2114 if( rc==SQLITE_BUSY ){ |
2033 /* If there is not a recovery running in another thread or process | 2115 /* If there is not a recovery running in another thread or process |
2034 ** then convert BUSY errors to WAL_RETRY. If recovery is known to | 2116 ** then convert BUSY errors to WAL_RETRY. If recovery is known to |
2035 ** be running, convert BUSY to BUSY_RECOVERY. There is a race here | 2117 ** be running, convert BUSY to BUSY_RECOVERY. There is a race here |
2036 ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY | 2118 ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY |
(...skipping 28 matching lines...) Expand all Loading... |
2065 ** and can be safely ignored. | 2147 ** and can be safely ignored. |
2066 */ | 2148 */ |
2067 rc = walLockShared(pWal, WAL_READ_LOCK(0)); | 2149 rc = walLockShared(pWal, WAL_READ_LOCK(0)); |
2068 walShmBarrier(pWal); | 2150 walShmBarrier(pWal); |
2069 if( rc==SQLITE_OK ){ | 2151 if( rc==SQLITE_OK ){ |
2070 if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ | 2152 if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ |
2071 /* It is not safe to allow the reader to continue here if frames | 2153 /* It is not safe to allow the reader to continue here if frames |
2072 ** may have been appended to the log before READ_LOCK(0) was obtained. | 2154 ** may have been appended to the log before READ_LOCK(0) was obtained. |
2073 ** When holding READ_LOCK(0), the reader ignores the entire log file, | 2155 ** When holding READ_LOCK(0), the reader ignores the entire log file, |
2074 ** which implies that the database file contains a trustworthy | 2156 ** which implies that the database file contains a trustworthy |
2075 ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from | 2157 ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from |
2076 ** happening, this is usually correct. | 2158 ** happening, this is usually correct. |
2077 ** | 2159 ** |
2078 ** However, if frames have been appended to the log (or if the log | 2160 ** However, if frames have been appended to the log (or if the log |
2079 ** is wrapped and written for that matter) before the READ_LOCK(0) | 2161 ** is wrapped and written for that matter) before the READ_LOCK(0) |
2080 ** is obtained, that is not necessarily true. A checkpointer may | 2162 ** is obtained, that is not necessarily true. A checkpointer may |
2081 ** have started to backfill the appended frames but crashed before | 2163 ** have started to backfill the appended frames but crashed before |
2082 ** it finished. Leaving a corrupt image in the database file. | 2164 ** it finished. Leaving a corrupt image in the database file. |
2083 */ | 2165 */ |
2084 walUnlockShared(pWal, WAL_READ_LOCK(0)); | 2166 walUnlockShared(pWal, WAL_READ_LOCK(0)); |
2085 return WAL_RETRY; | 2167 return WAL_RETRY; |
(...skipping 15 matching lines...) Expand all Loading... |
2101 for(i=1; i<WAL_NREADER; i++){ | 2183 for(i=1; i<WAL_NREADER; i++){ |
2102 u32 thisMark = pInfo->aReadMark[i]; | 2184 u32 thisMark = pInfo->aReadMark[i]; |
2103 if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){ | 2185 if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){ |
2104 assert( thisMark!=READMARK_NOT_USED ); | 2186 assert( thisMark!=READMARK_NOT_USED ); |
2105 mxReadMark = thisMark; | 2187 mxReadMark = thisMark; |
2106 mxI = i; | 2188 mxI = i; |
2107 } | 2189 } |
2108 } | 2190 } |
2109 /* There was once an "if" here. The extra "{" is to preserve indentation. */ | 2191 /* There was once an "if" here. The extra "{" is to preserve indentation. */ |
2110 { | 2192 { |
2111 if( mxReadMark < pWal->hdr.mxFrame || mxI==0 ){ | 2193 if( (pWal->readOnly & WAL_SHM_RDONLY)==0 |
| 2194 && (mxReadMark<pWal->hdr.mxFrame || mxI==0) |
| 2195 ){ |
2112 for(i=1; i<WAL_NREADER; i++){ | 2196 for(i=1; i<WAL_NREADER; i++){ |
2113 rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); | 2197 rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); |
2114 if( rc==SQLITE_OK ){ | 2198 if( rc==SQLITE_OK ){ |
2115 mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame; | 2199 mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame; |
2116 mxI = i; | 2200 mxI = i; |
2117 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); | 2201 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); |
2118 break; | 2202 break; |
2119 }else if( rc!=SQLITE_BUSY ){ | 2203 }else if( rc!=SQLITE_BUSY ){ |
2120 return rc; | 2204 return rc; |
2121 } | 2205 } |
2122 } | 2206 } |
2123 } | 2207 } |
2124 if( mxI==0 ){ | 2208 if( mxI==0 ){ |
2125 assert( rc==SQLITE_BUSY ); | 2209 assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); |
2126 return WAL_RETRY; | 2210 return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK; |
2127 } | 2211 } |
2128 | 2212 |
2129 rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); | 2213 rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); |
2130 if( rc ){ | 2214 if( rc ){ |
2131 return rc==SQLITE_BUSY ? WAL_RETRY : rc; | 2215 return rc==SQLITE_BUSY ? WAL_RETRY : rc; |
2132 } | 2216 } |
2133 /* Now that the read-lock has been obtained, check that neither the | 2217 /* Now that the read-lock has been obtained, check that neither the |
2134 ** value in the aReadMark[] array or the contents of the wal-index | 2218 ** value in the aReadMark[] array or the contents of the wal-index |
2135 ** header have changed. | 2219 ** header have changed. |
2136 ** | 2220 ** |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2198 */ | 2282 */ |
2199 void sqlite3WalEndReadTransaction(Wal *pWal){ | 2283 void sqlite3WalEndReadTransaction(Wal *pWal){ |
2200 sqlite3WalEndWriteTransaction(pWal); | 2284 sqlite3WalEndWriteTransaction(pWal); |
2201 if( pWal->readLock>=0 ){ | 2285 if( pWal->readLock>=0 ){ |
2202 walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); | 2286 walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); |
2203 pWal->readLock = -1; | 2287 pWal->readLock = -1; |
2204 } | 2288 } |
2205 } | 2289 } |
2206 | 2290 |
2207 /* | 2291 /* |
2208 ** Read a page from the WAL, if it is present in the WAL and if the | 2292 ** Search the wal file for page pgno. If found, set *piRead to the frame that |
2209 ** current read transaction is configured to use the WAL. | 2293 ** contains the page. Otherwise, if pgno is not in the wal file, set *piRead |
| 2294 ** to zero. |
2210 ** | 2295 ** |
2211 ** The *pInWal is set to 1 if the requested page is in the WAL and | 2296 ** Return SQLITE_OK if successful, or an error code if an error occurs. If an |
2212 ** has been loaded. Or *pInWal is set to 0 if the page was not in | 2297 ** error does occur, the final value of *piRead is undefined. |
2213 ** the WAL and needs to be read out of the database. | |
2214 */ | 2298 */ |
2215 int sqlite3WalRead( | 2299 int sqlite3WalFindFrame( |
2216 Wal *pWal, /* WAL handle */ | 2300 Wal *pWal, /* WAL handle */ |
2217 Pgno pgno, /* Database page number to read data for */ | 2301 Pgno pgno, /* Database page number to read data for */ |
2218 int *pInWal, /* OUT: True if data is read from WAL */ | 2302 u32 *piRead /* OUT: Frame number (or zero) */ |
2219 int nOut, /* Size of buffer pOut in bytes */ | |
2220 u8 *pOut /* Buffer to write page data to */ | |
2221 ){ | 2303 ){ |
2222 u32 iRead = 0; /* If !=0, WAL frame to return data from */ | 2304 u32 iRead = 0; /* If !=0, WAL frame to return data from */ |
2223 u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ | 2305 u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ |
2224 int iHash; /* Used to loop through N hash tables */ | 2306 int iHash; /* Used to loop through N hash tables */ |
2225 | 2307 |
2226 /* This routine is only be called from within a read transaction. */ | 2308 /* This routine is only be called from within a read transaction. */ |
2227 assert( pWal->readLock>=0 || pWal->lockError ); | 2309 assert( pWal->readLock>=0 || pWal->lockError ); |
2228 | 2310 |
2229 /* If the "last page" field of the wal-index header snapshot is 0, then | 2311 /* If the "last page" field of the wal-index header snapshot is 0, then |
2230 ** no data will be read from the wal under any circumstances. Return early | 2312 ** no data will be read from the wal under any circumstances. Return early |
2231 ** in this case as an optimization. Likewise, if pWal->readLock==0, | 2313 ** in this case as an optimization. Likewise, if pWal->readLock==0, |
2232 ** then the WAL is ignored by the reader so return early, as if the | 2314 ** then the WAL is ignored by the reader so return early, as if the |
2233 ** WAL were empty. | 2315 ** WAL were empty. |
2234 */ | 2316 */ |
2235 if( iLast==0 || pWal->readLock==0 ){ | 2317 if( iLast==0 || pWal->readLock==0 ){ |
2236 *pInWal = 0; | 2318 *piRead = 0; |
2237 return SQLITE_OK; | 2319 return SQLITE_OK; |
2238 } | 2320 } |
2239 | 2321 |
2240 /* Search the hash table or tables for an entry matching page number | 2322 /* Search the hash table or tables for an entry matching page number |
2241 ** pgno. Each iteration of the following for() loop searches one | 2323 ** pgno. Each iteration of the following for() loop searches one |
2242 ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). | 2324 ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). |
2243 ** | 2325 ** |
2244 ** This code might run concurrently to the code in walIndexAppend() | 2326 ** This code might run concurrently to the code in walIndexAppend() |
2245 ** that adds entries to the wal-index (and possibly to this hash | 2327 ** that adds entries to the wal-index (and possibly to this hash |
2246 ** table). This means the value just read from the hash | 2328 ** table). This means the value just read from the hash |
(...skipping 24 matching lines...) Expand all Loading... |
2271 int rc; /* Error code */ | 2353 int rc; /* Error code */ |
2272 | 2354 |
2273 rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); | 2355 rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); |
2274 if( rc!=SQLITE_OK ){ | 2356 if( rc!=SQLITE_OK ){ |
2275 return rc; | 2357 return rc; |
2276 } | 2358 } |
2277 nCollide = HASHTABLE_NSLOT; | 2359 nCollide = HASHTABLE_NSLOT; |
2278 for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ | 2360 for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ |
2279 u32 iFrame = aHash[iKey] + iZero; | 2361 u32 iFrame = aHash[iKey] + iZero; |
2280 if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ | 2362 if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ |
2281 assert( iFrame>iRead ); | 2363 /* assert( iFrame>iRead ); -- not true if there is corruption */ |
2282 iRead = iFrame; | 2364 iRead = iFrame; |
2283 } | 2365 } |
2284 if( (nCollide--)==0 ){ | 2366 if( (nCollide--)==0 ){ |
2285 return SQLITE_CORRUPT_BKPT; | 2367 return SQLITE_CORRUPT_BKPT; |
2286 } | 2368 } |
2287 } | 2369 } |
2288 } | 2370 } |
2289 | 2371 |
2290 #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT | 2372 #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT |
2291 /* If expensive assert() statements are available, do a linear search | 2373 /* If expensive assert() statements are available, do a linear search |
2292 ** of the wal-index file content. Make sure the results agree with the | 2374 ** of the wal-index file content. Make sure the results agree with the |
2293 ** result obtained using the hash indexes above. */ | 2375 ** result obtained using the hash indexes above. */ |
2294 { | 2376 { |
2295 u32 iRead2 = 0; | 2377 u32 iRead2 = 0; |
2296 u32 iTest; | 2378 u32 iTest; |
2297 for(iTest=iLast; iTest>0; iTest--){ | 2379 for(iTest=iLast; iTest>0; iTest--){ |
2298 if( walFramePgno(pWal, iTest)==pgno ){ | 2380 if( walFramePgno(pWal, iTest)==pgno ){ |
2299 iRead2 = iTest; | 2381 iRead2 = iTest; |
2300 break; | 2382 break; |
2301 } | 2383 } |
2302 } | 2384 } |
2303 assert( iRead==iRead2 ); | 2385 assert( iRead==iRead2 ); |
2304 } | 2386 } |
2305 #endif | 2387 #endif |
2306 | 2388 |
2307 /* If iRead is non-zero, then it is the log frame number that contains the | 2389 *piRead = iRead; |
2308 ** required page. Read and return data from the log file. | |
2309 */ | |
2310 if( iRead ){ | |
2311 int sz; | |
2312 i64 iOffset; | |
2313 sz = pWal->hdr.szPage; | |
2314 sz = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); | |
2315 testcase( sz<=32768 ); | |
2316 testcase( sz>=65536 ); | |
2317 iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; | |
2318 *pInWal = 1; | |
2319 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | |
2320 return sqlite3OsRead(pWal->pWalFd, pOut, nOut, iOffset); | |
2321 } | |
2322 | |
2323 *pInWal = 0; | |
2324 return SQLITE_OK; | 2390 return SQLITE_OK; |
2325 } | 2391 } |
2326 | 2392 |
| 2393 /* |
| 2394 ** Read the contents of frame iRead from the wal file into buffer pOut |
| 2395 ** (which is nOut bytes in size). Return SQLITE_OK if successful, or an |
| 2396 ** error code otherwise. |
| 2397 */ |
| 2398 int sqlite3WalReadFrame( |
| 2399 Wal *pWal, /* WAL handle */ |
| 2400 u32 iRead, /* Frame to read */ |
| 2401 int nOut, /* Size of buffer pOut in bytes */ |
| 2402 u8 *pOut /* Buffer to write page data to */ |
| 2403 ){ |
| 2404 int sz; |
| 2405 i64 iOffset; |
| 2406 sz = pWal->hdr.szPage; |
| 2407 sz = (sz&0xfe00) + ((sz&0x0001)<<16); |
| 2408 testcase( sz<=32768 ); |
| 2409 testcase( sz>=65536 ); |
| 2410 iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; |
| 2411 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ |
| 2412 return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); |
| 2413 } |
2327 | 2414 |
2328 /* | 2415 /* |
2329 ** Return the size of the database in pages (or zero, if unknown). | 2416 ** Return the size of the database in pages (or zero, if unknown). |
2330 */ | 2417 */ |
2331 Pgno sqlite3WalDbsize(Wal *pWal){ | 2418 Pgno sqlite3WalDbsize(Wal *pWal){ |
2332 if( pWal && ALWAYS(pWal->readLock>=0) ){ | 2419 if( pWal && ALWAYS(pWal->readLock>=0) ){ |
2333 return pWal->hdr.nPage; | 2420 return pWal->hdr.nPage; |
2334 } | 2421 } |
2335 return 0; | 2422 return 0; |
2336 } | 2423 } |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2369 } | 2456 } |
2370 pWal->writeLock = 1; | 2457 pWal->writeLock = 1; |
2371 | 2458 |
2372 /* If another connection has written to the database file since the | 2459 /* If another connection has written to the database file since the |
2373 ** time the read transaction on this connection was started, then | 2460 ** time the read transaction on this connection was started, then |
2374 ** the write is disallowed. | 2461 ** the write is disallowed. |
2375 */ | 2462 */ |
2376 if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ | 2463 if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ |
2377 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 2464 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); |
2378 pWal->writeLock = 0; | 2465 pWal->writeLock = 0; |
2379 rc = SQLITE_BUSY; | 2466 rc = SQLITE_BUSY_SNAPSHOT; |
2380 } | 2467 } |
2381 | 2468 |
2382 return rc; | 2469 return rc; |
2383 } | 2470 } |
2384 | 2471 |
2385 /* | 2472 /* |
2386 ** End a write transaction. The commit has already been done. This | 2473 ** End a write transaction. The commit has already been done. This |
2387 ** routine merely releases the lock. | 2474 ** routine merely releases the lock. |
2388 */ | 2475 */ |
2389 int sqlite3WalEndWriteTransaction(Wal *pWal){ | 2476 int sqlite3WalEndWriteTransaction(Wal *pWal){ |
2390 if( pWal->writeLock ){ | 2477 if( pWal->writeLock ){ |
2391 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); | 2478 walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); |
2392 pWal->writeLock = 0; | 2479 pWal->writeLock = 0; |
| 2480 pWal->truncateOnCommit = 0; |
2393 } | 2481 } |
2394 return SQLITE_OK; | 2482 return SQLITE_OK; |
2395 } | 2483 } |
2396 | 2484 |
2397 /* | 2485 /* |
2398 ** If any data has been written (but not committed) to the log file, this | 2486 ** If any data has been written (but not committed) to the log file, this |
2399 ** function moves the write-pointer back to the start of the transaction. | 2487 ** function moves the write-pointer back to the start of the transaction. |
2400 ** | 2488 ** |
2401 ** Additionally, the callback function is invoked for each frame written | 2489 ** Additionally, the callback function is invoked for each frame written |
2402 ** to the WAL since the start of the transaction. If the callback returns | 2490 ** to the WAL since the start of the transaction. If the callback returns |
(...skipping 25 matching lines...) Expand all Loading... |
2428 ** is false). | 2516 ** is false). |
2429 ** | 2517 ** |
2430 ** If the upper layer is doing a rollback, it is guaranteed that there | 2518 ** If the upper layer is doing a rollback, it is guaranteed that there |
2431 ** are no outstanding references to any page other than page 1. And | 2519 ** are no outstanding references to any page other than page 1. And |
2432 ** page 1 is never written to the log until the transaction is | 2520 ** page 1 is never written to the log until the transaction is |
2433 ** committed. As a result, the call to xUndo may not fail. | 2521 ** committed. As a result, the call to xUndo may not fail. |
2434 */ | 2522 */ |
2435 assert( walFramePgno(pWal, iFrame)!=1 ); | 2523 assert( walFramePgno(pWal, iFrame)!=1 ); |
2436 rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); | 2524 rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); |
2437 } | 2525 } |
2438 walCleanupHash(pWal); | 2526 if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); |
2439 } | 2527 } |
2440 assert( rc==SQLITE_OK ); | |
2441 return rc; | 2528 return rc; |
2442 } | 2529 } |
2443 | 2530 |
2444 /* | 2531 /* |
2445 ** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 | 2532 ** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 |
2446 ** values. This function populates the array with values required to | 2533 ** values. This function populates the array with values required to |
2447 ** "rollback" the write position of the WAL handle back to the current | 2534 ** "rollback" the write position of the WAL handle back to the current |
2448 ** point in the event of a savepoint rollback (via WalSavepointUndo()). | 2535 ** point in the event of a savepoint rollback (via WalSavepointUndo()). |
2449 */ | 2536 */ |
2450 void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ | 2537 void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ |
(...skipping 28 matching lines...) Expand all Loading... |
2479 if( aWalData[0]<pWal->hdr.mxFrame ){ | 2566 if( aWalData[0]<pWal->hdr.mxFrame ){ |
2480 pWal->hdr.mxFrame = aWalData[0]; | 2567 pWal->hdr.mxFrame = aWalData[0]; |
2481 pWal->hdr.aFrameCksum[0] = aWalData[1]; | 2568 pWal->hdr.aFrameCksum[0] = aWalData[1]; |
2482 pWal->hdr.aFrameCksum[1] = aWalData[2]; | 2569 pWal->hdr.aFrameCksum[1] = aWalData[2]; |
2483 walCleanupHash(pWal); | 2570 walCleanupHash(pWal); |
2484 } | 2571 } |
2485 | 2572 |
2486 return rc; | 2573 return rc; |
2487 } | 2574 } |
2488 | 2575 |
| 2576 |
2489 /* | 2577 /* |
2490 ** This function is called just before writing a set of frames to the log | 2578 ** This function is called just before writing a set of frames to the log |
2491 ** file (see sqlite3WalFrames()). It checks to see if, instead of appending | 2579 ** file (see sqlite3WalFrames()). It checks to see if, instead of appending |
2492 ** to the current log file, it is possible to overwrite the start of the | 2580 ** to the current log file, it is possible to overwrite the start of the |
2493 ** existing log file with the new frames (i.e. "reset" the log). If so, | 2581 ** existing log file with the new frames (i.e. "reset" the log). If so, |
2494 ** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left | 2582 ** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left |
2495 ** unchanged. | 2583 ** unchanged. |
2496 ** | 2584 ** |
2497 ** SQLITE_OK is returned if no error is encountered (regardless of whether | 2585 ** SQLITE_OK is returned if no error is encountered (regardless of whether |
2498 ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned | 2586 ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned |
(...skipping 16 matching lines...) Expand all Loading... |
2515 ** frames will overwrite the start of the existing log. Update the | 2603 ** frames will overwrite the start of the existing log. Update the |
2516 ** wal-index header to reflect this. | 2604 ** wal-index header to reflect this. |
2517 ** | 2605 ** |
2518 ** In theory it would be Ok to update the cache of the header only | 2606 ** In theory it would be Ok to update the cache of the header only |
2519 ** at this point. But updating the actual wal-index header is also | 2607 ** at this point. But updating the actual wal-index header is also |
2520 ** safe and means there is no special case for sqlite3WalUndo() | 2608 ** safe and means there is no special case for sqlite3WalUndo() |
2521 ** to handle if this transaction is rolled back. | 2609 ** to handle if this transaction is rolled back. |
2522 */ | 2610 */ |
2523 int i; /* Loop counter */ | 2611 int i; /* Loop counter */ |
2524 u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ | 2612 u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ |
| 2613 |
2525 pWal->nCkpt++; | 2614 pWal->nCkpt++; |
2526 pWal->hdr.mxFrame = 0; | 2615 pWal->hdr.mxFrame = 0; |
2527 sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); | 2616 sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); |
2528 aSalt[1] = salt1; | 2617 aSalt[1] = salt1; |
2529 walIndexWriteHdr(pWal); | 2618 walIndexWriteHdr(pWal); |
2530 pInfo->nBackfill = 0; | 2619 pInfo->nBackfill = 0; |
2531 for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | 2620 pInfo->aReadMark[1] = 0; |
| 2621 for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; |
2532 assert( pInfo->aReadMark[0]==0 ); | 2622 assert( pInfo->aReadMark[0]==0 ); |
2533 walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); | 2623 walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); |
2534 }else if( rc!=SQLITE_BUSY ){ | 2624 }else if( rc!=SQLITE_BUSY ){ |
2535 return rc; | 2625 return rc; |
2536 } | 2626 } |
2537 } | 2627 } |
2538 walUnlockShared(pWal, WAL_READ_LOCK(0)); | 2628 walUnlockShared(pWal, WAL_READ_LOCK(0)); |
2539 pWal->readLock = -1; | 2629 pWal->readLock = -1; |
2540 cnt = 0; | 2630 cnt = 0; |
2541 do{ | 2631 do{ |
2542 int notUsed; | 2632 int notUsed; |
2543 rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); | 2633 rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); |
2544 }while( rc==WAL_RETRY ); | 2634 }while( rc==WAL_RETRY ); |
2545 assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ | 2635 assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ |
2546 testcase( (rc&0xff)==SQLITE_IOERR ); | 2636 testcase( (rc&0xff)==SQLITE_IOERR ); |
2547 testcase( rc==SQLITE_PROTOCOL ); | 2637 testcase( rc==SQLITE_PROTOCOL ); |
2548 testcase( rc==SQLITE_OK ); | 2638 testcase( rc==SQLITE_OK ); |
2549 } | 2639 } |
2550 return rc; | 2640 return rc; |
2551 } | 2641 } |
2552 | 2642 |
| 2643 /* |
| 2644 ** Information about the current state of the WAL file and where |
| 2645 ** the next fsync should occur - passed from sqlite3WalFrames() into |
| 2646 ** walWriteToLog(). |
| 2647 */ |
| 2648 typedef struct WalWriter { |
| 2649 Wal *pWal; /* The complete WAL information */ |
| 2650 sqlite3_file *pFd; /* The WAL file to which we write */ |
| 2651 sqlite3_int64 iSyncPoint; /* Fsync at this offset */ |
| 2652 int syncFlags; /* Flags for the fsync */ |
| 2653 int szPage; /* Size of one page */ |
| 2654 } WalWriter; |
| 2655 |
| 2656 /* |
| 2657 ** Write iAmt bytes of content into the WAL file beginning at iOffset. |
| 2658 ** Do a sync when crossing the p->iSyncPoint boundary. |
| 2659 ** |
| 2660 ** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, |
| 2661 ** first write the part before iSyncPoint, then sync, then write the |
| 2662 ** rest. |
| 2663 */ |
| 2664 static int walWriteToLog( |
| 2665 WalWriter *p, /* WAL to write to */ |
| 2666 void *pContent, /* Content to be written */ |
| 2667 int iAmt, /* Number of bytes to write */ |
| 2668 sqlite3_int64 iOffset /* Start writing at this offset */ |
| 2669 ){ |
| 2670 int rc; |
| 2671 if( iOffset<p->iSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ |
| 2672 int iFirstAmt = (int)(p->iSyncPoint - iOffset); |
| 2673 rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); |
| 2674 if( rc ) return rc; |
| 2675 iOffset += iFirstAmt; |
| 2676 iAmt -= iFirstAmt; |
| 2677 pContent = (void*)(iFirstAmt + (char*)pContent); |
| 2678 assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); |
| 2679 rc = sqlite3OsSync(p->pFd, p->syncFlags & SQLITE_SYNC_MASK); |
| 2680 if( iAmt==0 || rc ) return rc; |
| 2681 } |
| 2682 rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); |
| 2683 return rc; |
| 2684 } |
| 2685 |
| 2686 /* |
| 2687 ** Write out a single frame of the WAL |
| 2688 */ |
| 2689 static int walWriteOneFrame( |
| 2690 WalWriter *p, /* Where to write the frame */ |
| 2691 PgHdr *pPage, /* The page of the frame to be written */ |
| 2692 int nTruncate, /* The commit flag. Usually 0. >0 for commit */ |
| 2693 sqlite3_int64 iOffset /* Byte offset at which to write */ |
| 2694 ){ |
| 2695 int rc; /* Result code from subfunctions */ |
| 2696 void *pData; /* Data actually written */ |
| 2697 u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ |
| 2698 #if defined(SQLITE_HAS_CODEC) |
| 2699 if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; |
| 2700 #else |
| 2701 pData = pPage->pData; |
| 2702 #endif |
| 2703 walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); |
| 2704 rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); |
| 2705 if( rc ) return rc; |
| 2706 /* Write the page data */ |
| 2707 rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); |
| 2708 return rc; |
| 2709 } |
| 2710 |
2553 /* | 2711 /* |
2554 ** Write a set of frames to the log. The caller must hold the write-lock | 2712 ** Write a set of frames to the log. The caller must hold the write-lock |
2555 ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). | 2713 ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). |
2556 */ | 2714 */ |
2557 int sqlite3WalFrames( | 2715 int sqlite3WalFrames( |
2558 Wal *pWal, /* Wal handle to write to */ | 2716 Wal *pWal, /* Wal handle to write to */ |
2559 int szPage, /* Database page-size in bytes */ | 2717 int szPage, /* Database page-size in bytes */ |
2560 PgHdr *pList, /* List of dirty pages to write */ | 2718 PgHdr *pList, /* List of dirty pages to write */ |
2561 Pgno nTruncate, /* Database size after this commit */ | 2719 Pgno nTruncate, /* Database size after this commit */ |
2562 int isCommit, /* True if this is a commit */ | 2720 int isCommit, /* True if this is a commit */ |
2563 int sync_flags /* Flags to pass to OsSync() (or 0) */ | 2721 int sync_flags /* Flags to pass to OsSync() (or 0) */ |
2564 ){ | 2722 ){ |
2565 int rc; /* Used to catch return codes */ | 2723 int rc; /* Used to catch return codes */ |
2566 u32 iFrame; /* Next frame address */ | 2724 u32 iFrame; /* Next frame address */ |
2567 u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ | |
2568 PgHdr *p; /* Iterator to run through pList with. */ | 2725 PgHdr *p; /* Iterator to run through pList with. */ |
2569 PgHdr *pLast = 0; /* Last frame in list */ | 2726 PgHdr *pLast = 0; /* Last frame in list */ |
2570 int nLast = 0; /* Number of extra copies of last page */ | 2727 int nExtra = 0; /* Number of extra copies of last page */ |
| 2728 int szFrame; /* The size of a single frame */ |
| 2729 i64 iOffset; /* Next byte to write in WAL file */ |
| 2730 WalWriter w; /* The writer */ |
2571 | 2731 |
2572 assert( pList ); | 2732 assert( pList ); |
2573 assert( pWal->writeLock ); | 2733 assert( pWal->writeLock ); |
2574 | 2734 |
| 2735 /* If this frame set completes a transaction, then nTruncate>0. If |
| 2736 ** nTruncate==0 then this frame set does not complete the transaction. */ |
| 2737 assert( (isCommit!=0)==(nTruncate!=0) ); |
| 2738 |
2575 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) | 2739 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) |
2576 { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} | 2740 { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} |
2577 WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", | 2741 WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", |
2578 pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); | 2742 pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); |
2579 } | 2743 } |
2580 #endif | 2744 #endif |
2581 | 2745 |
2582 /* See if it is possible to write these frames into the start of the | 2746 /* See if it is possible to write these frames into the start of the |
2583 ** log file, instead of appending to it at pWal->hdr.mxFrame. | 2747 ** log file, instead of appending to it at pWal->hdr.mxFrame. |
2584 */ | 2748 */ |
2585 if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ | 2749 if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ |
2586 return rc; | 2750 return rc; |
2587 } | 2751 } |
2588 | 2752 |
2589 /* If this is the first frame written into the log, write the WAL | 2753 /* If this is the first frame written into the log, write the WAL |
2590 ** header to the start of the WAL file. See comments at the top of | 2754 ** header to the start of the WAL file. See comments at the top of |
2591 ** this source file for a description of the WAL header format. | 2755 ** this source file for a description of the WAL header format. |
2592 */ | 2756 */ |
2593 iFrame = pWal->hdr.mxFrame; | 2757 iFrame = pWal->hdr.mxFrame; |
2594 if( iFrame==0 ){ | 2758 if( iFrame==0 ){ |
2595 u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ | 2759 u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ |
2596 u32 aCksum[2]; /* Checksum for wal-header */ | 2760 u32 aCksum[2]; /* Checksum for wal-header */ |
2597 | 2761 |
2598 sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); | 2762 sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); |
2599 sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); | 2763 sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); |
2600 sqlite3Put4byte(&aWalHdr[8], szPage); | 2764 sqlite3Put4byte(&aWalHdr[8], szPage); |
2601 sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); | 2765 sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); |
2602 sqlite3_randomness(8, pWal->hdr.aSalt); | 2766 if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); |
2603 memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); | 2767 memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); |
2604 walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); | 2768 walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); |
2605 sqlite3Put4byte(&aWalHdr[24], aCksum[0]); | 2769 sqlite3Put4byte(&aWalHdr[24], aCksum[0]); |
2606 sqlite3Put4byte(&aWalHdr[28], aCksum[1]); | 2770 sqlite3Put4byte(&aWalHdr[28], aCksum[1]); |
2607 | 2771 |
2608 pWal->szPage = szPage; | 2772 pWal->szPage = szPage; |
2609 pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; | 2773 pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; |
2610 pWal->hdr.aFrameCksum[0] = aCksum[0]; | 2774 pWal->hdr.aFrameCksum[0] = aCksum[0]; |
2611 pWal->hdr.aFrameCksum[1] = aCksum[1]; | 2775 pWal->hdr.aFrameCksum[1] = aCksum[1]; |
| 2776 pWal->truncateOnCommit = 1; |
2612 | 2777 |
2613 rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); | 2778 rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); |
2614 WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); | 2779 WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); |
2615 if( rc!=SQLITE_OK ){ | 2780 if( rc!=SQLITE_OK ){ |
2616 return rc; | 2781 return rc; |
2617 } | 2782 } |
| 2783 |
| 2784 /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless |
| 2785 ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise |
| 2786 ** an out-of-order write following a WAL restart could result in |
| 2787 ** database corruption. See the ticket: |
| 2788 ** |
| 2789 ** http://localhost:591/sqlite/info/ff5be73dee |
| 2790 */ |
| 2791 if( pWal->syncHeader && sync_flags ){ |
| 2792 rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); |
| 2793 if( rc ) return rc; |
| 2794 } |
2618 } | 2795 } |
2619 assert( (int)pWal->szPage==szPage ); | 2796 assert( (int)pWal->szPage==szPage ); |
2620 | 2797 |
2621 /* Write the log file. */ | 2798 /* Setup information needed to write frames into the WAL */ |
| 2799 w.pWal = pWal; |
| 2800 w.pFd = pWal->pWalFd; |
| 2801 w.iSyncPoint = 0; |
| 2802 w.syncFlags = sync_flags; |
| 2803 w.szPage = szPage; |
| 2804 iOffset = walFrameOffset(iFrame+1, szPage); |
| 2805 szFrame = szPage + WAL_FRAME_HDRSIZE; |
| 2806 |
| 2807 /* Write all frames into the log file exactly once */ |
2622 for(p=pList; p; p=p->pDirty){ | 2808 for(p=pList; p; p=p->pDirty){ |
2623 u32 nDbsize; /* Db-size field for frame header */ | 2809 int nDbSize; /* 0 normally. Positive == commit flag */ |
2624 i64 iOffset; /* Write offset in log file */ | 2810 iFrame++; |
2625 void *pData; | 2811 assert( iOffset==walFrameOffset(iFrame, szPage) ); |
2626 | 2812 nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; |
2627 iOffset = walFrameOffset(++iFrame, szPage); | 2813 rc = walWriteOneFrame(&w, p, nDbSize, iOffset); |
2628 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | 2814 if( rc ) return rc; |
2629 | |
2630 /* Populate and write the frame header */ | |
2631 nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; | |
2632 #if defined(SQLITE_HAS_CODEC) | |
2633 if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM; | |
2634 #else | |
2635 pData = p->pData; | |
2636 #endif | |
2637 walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame); | |
2638 rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); | |
2639 if( rc!=SQLITE_OK ){ | |
2640 return rc; | |
2641 } | |
2642 | |
2643 /* Write the page data */ | |
2644 rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame)); | |
2645 if( rc!=SQLITE_OK ){ | |
2646 return rc; | |
2647 } | |
2648 pLast = p; | 2815 pLast = p; |
| 2816 iOffset += szFrame; |
2649 } | 2817 } |
2650 | 2818 |
2651 /* Sync the log file if the 'isSync' flag was specified. */ | 2819 /* If this is the end of a transaction, then we might need to pad |
2652 if( sync_flags ){ | 2820 ** the transaction and/or sync the WAL file. |
2653 i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd); | 2821 ** |
2654 i64 iOffset = walFrameOffset(iFrame+1, szPage); | 2822 ** Padding and syncing only occur if this set of frames complete a |
| 2823 ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL |
| 2824 ** or synchronous==OFF, then no padding or syncing are needed. |
| 2825 ** |
| 2826 ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not |
| 2827 ** needed and only the sync is done. If padding is needed, then the |
| 2828 ** final frame is repeated (with its commit mark) until the next sector |
| 2829 ** boundary is crossed. Only the part of the WAL prior to the last |
| 2830 ** sector boundary is synced; the part of the last frame that extends |
| 2831 ** past the sector boundary is written after the sync. |
| 2832 */ |
| 2833 if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ |
| 2834 if( pWal->padToSectorBoundary ){ |
| 2835 int sectorSize = sqlite3SectorSize(pWal->pWalFd); |
| 2836 w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; |
| 2837 while( iOffset<w.iSyncPoint ){ |
| 2838 rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset); |
| 2839 if( rc ) return rc; |
| 2840 iOffset += szFrame; |
| 2841 nExtra++; |
| 2842 } |
| 2843 }else{ |
| 2844 rc = sqlite3OsSync(w.pFd, sync_flags & SQLITE_SYNC_MASK); |
| 2845 } |
| 2846 } |
2655 | 2847 |
2656 assert( isCommit ); | 2848 /* If this frame set completes the first transaction in the WAL and |
2657 assert( iSegment>0 ); | 2849 ** if PRAGMA journal_size_limit is set, then truncate the WAL to the |
2658 | 2850 ** journal size limit, if possible. |
2659 iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); | 2851 */ |
2660 while( iOffset<iSegment ){ | 2852 if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){ |
2661 void *pData; | 2853 i64 sz = pWal->mxWalSize; |
2662 #if defined(SQLITE_HAS_CODEC) | 2854 if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ |
2663 if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM; | 2855 sz = walFrameOffset(iFrame+nExtra+1, szPage); |
2664 #else | |
2665 pData = pLast->pData; | |
2666 #endif | |
2667 walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame); | |
2668 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ | |
2669 rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset); | |
2670 if( rc!=SQLITE_OK ){ | |
2671 return rc; | |
2672 } | |
2673 iOffset += WAL_FRAME_HDRSIZE; | |
2674 rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); | |
2675 if( rc!=SQLITE_OK ){ | |
2676 return rc; | |
2677 } | |
2678 nLast++; | |
2679 iOffset += szPage; | |
2680 } | 2856 } |
2681 | 2857 walLimitSize(pWal, sz); |
2682 rc = sqlite3OsSync(pWal->pWalFd, sync_flags); | 2858 pWal->truncateOnCommit = 0; |
2683 } | 2859 } |
2684 | 2860 |
2685 /* Append data to the wal-index. It is not necessary to lock the | 2861 /* Append data to the wal-index. It is not necessary to lock the |
2686 ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index | 2862 ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index |
2687 ** guarantees that there are no other writers, and no data that may | 2863 ** guarantees that there are no other writers, and no data that may |
2688 ** be in use by existing readers is being overwritten. | 2864 ** be in use by existing readers is being overwritten. |
2689 */ | 2865 */ |
2690 iFrame = pWal->hdr.mxFrame; | 2866 iFrame = pWal->hdr.mxFrame; |
2691 for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ | 2867 for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ |
2692 iFrame++; | 2868 iFrame++; |
2693 rc = walIndexAppend(pWal, iFrame, p->pgno); | 2869 rc = walIndexAppend(pWal, iFrame, p->pgno); |
2694 } | 2870 } |
2695 while( nLast>0 && rc==SQLITE_OK ){ | 2871 while( rc==SQLITE_OK && nExtra>0 ){ |
2696 iFrame++; | 2872 iFrame++; |
2697 nLast--; | 2873 nExtra--; |
2698 rc = walIndexAppend(pWal, iFrame, pLast->pgno); | 2874 rc = walIndexAppend(pWal, iFrame, pLast->pgno); |
2699 } | 2875 } |
2700 | 2876 |
2701 if( rc==SQLITE_OK ){ | 2877 if( rc==SQLITE_OK ){ |
2702 /* Update the private copy of the header. */ | 2878 /* Update the private copy of the header. */ |
2703 pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); | 2879 pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); |
2704 testcase( szPage<=32768 ); | 2880 testcase( szPage<=32768 ); |
2705 testcase( szPage>=65536 ); | 2881 testcase( szPage>=65536 ); |
2706 pWal->hdr.mxFrame = iFrame; | 2882 pWal->hdr.mxFrame = iFrame; |
2707 if( isCommit ){ | 2883 if( isCommit ){ |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2740 int *pnLog, /* OUT: Number of frames in WAL */ | 2916 int *pnLog, /* OUT: Number of frames in WAL */ |
2741 int *pnCkpt /* OUT: Number of backfilled frames in WAL */ | 2917 int *pnCkpt /* OUT: Number of backfilled frames in WAL */ |
2742 ){ | 2918 ){ |
2743 int rc; /* Return code */ | 2919 int rc; /* Return code */ |
2744 int isChanged = 0; /* True if a new wal-index header is loaded */ | 2920 int isChanged = 0; /* True if a new wal-index header is loaded */ |
2745 int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ | 2921 int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ |
2746 | 2922 |
2747 assert( pWal->ckptLock==0 ); | 2923 assert( pWal->ckptLock==0 ); |
2748 assert( pWal->writeLock==0 ); | 2924 assert( pWal->writeLock==0 ); |
2749 | 2925 |
| 2926 if( pWal->readOnly ) return SQLITE_READONLY; |
2750 WALTRACE(("WAL%p: checkpoint begins\n", pWal)); | 2927 WALTRACE(("WAL%p: checkpoint begins\n", pWal)); |
2751 rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); | 2928 rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); |
2752 if( rc ){ | 2929 if( rc ){ |
2753 /* Usually this is SQLITE_BUSY meaning that another thread or process | 2930 /* Usually this is SQLITE_BUSY meaning that another thread or process |
2754 ** is already running a checkpoint, or maybe a recovery. But it might | 2931 ** is already running a checkpoint, or maybe a recovery. But it might |
2755 ** also be SQLITE_IOERR. */ | 2932 ** also be SQLITE_IOERR. */ |
2756 return rc; | 2933 return rc; |
2757 } | 2934 } |
2758 pWal->ckptLock = 1; | 2935 pWal->ckptLock = 1; |
2759 | 2936 |
(...skipping 12 matching lines...) Expand all Loading... |
2772 pWal->writeLock = 1; | 2949 pWal->writeLock = 1; |
2773 }else if( rc==SQLITE_BUSY ){ | 2950 }else if( rc==SQLITE_BUSY ){ |
2774 eMode2 = SQLITE_CHECKPOINT_PASSIVE; | 2951 eMode2 = SQLITE_CHECKPOINT_PASSIVE; |
2775 rc = SQLITE_OK; | 2952 rc = SQLITE_OK; |
2776 } | 2953 } |
2777 } | 2954 } |
2778 | 2955 |
2779 /* Read the wal-index header. */ | 2956 /* Read the wal-index header. */ |
2780 if( rc==SQLITE_OK ){ | 2957 if( rc==SQLITE_OK ){ |
2781 rc = walIndexReadHdr(pWal, &isChanged); | 2958 rc = walIndexReadHdr(pWal, &isChanged); |
| 2959 if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ |
| 2960 sqlite3OsUnfetch(pWal->pDbFd, 0, 0); |
| 2961 } |
2782 } | 2962 } |
2783 | 2963 |
2784 /* Copy data from the log to the database file. */ | 2964 /* Copy data from the log to the database file. */ |
2785 if( rc==SQLITE_OK ){ | 2965 if( rc==SQLITE_OK ){ |
2786 if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ | 2966 if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ |
2787 rc = SQLITE_CORRUPT_BKPT; | 2967 rc = SQLITE_CORRUPT_BKPT; |
2788 }else{ | 2968 }else{ |
2789 rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf); | 2969 rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf); |
2790 } | 2970 } |
2791 | 2971 |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2891 | 3071 |
2892 /* | 3072 /* |
2893 ** Return true if the argument is non-NULL and the WAL module is using | 3073 ** Return true if the argument is non-NULL and the WAL module is using |
2894 ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the | 3074 ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the |
2895 ** WAL module is using shared-memory, return false. | 3075 ** WAL module is using shared-memory, return false. |
2896 */ | 3076 */ |
2897 int sqlite3WalHeapMemory(Wal *pWal){ | 3077 int sqlite3WalHeapMemory(Wal *pWal){ |
2898 return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); | 3078 return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); |
2899 } | 3079 } |
2900 | 3080 |
| 3081 #ifdef SQLITE_ENABLE_ZIPVFS |
| 3082 /* |
| 3083 ** If the argument is not NULL, it points to a Wal object that holds a |
| 3084 ** read-lock. This function returns the database page-size if it is known, |
| 3085 ** or zero if it is not (or if pWal is NULL). |
| 3086 */ |
| 3087 int sqlite3WalFramesize(Wal *pWal){ |
| 3088 assert( pWal==0 || pWal->readLock>=0 ); |
| 3089 return (pWal ? pWal->szPage : 0); |
| 3090 } |
| 3091 #endif |
| 3092 |
2901 #endif /* #ifndef SQLITE_OMIT_WAL */ | 3093 #endif /* #ifndef SQLITE_OMIT_WAL */ |
OLD | NEW |