OLD | NEW |
1 /* | 1 /* |
2 ** 2010 February 1 | 2 ** 2010 February 1 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
265 ** Similarly, if a client successfully reads a wal-index header (i.e. the | 265 ** Similarly, if a client successfully reads a wal-index header (i.e. the |
266 ** checksum test is successful) and finds that the version field is not | 266 ** checksum test is successful) and finds that the version field is not |
267 ** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite | 267 ** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite |
268 ** returns SQLITE_CANTOPEN. | 268 ** returns SQLITE_CANTOPEN. |
269 */ | 269 */ |
270 #define WAL_MAX_VERSION 3007000 | 270 #define WAL_MAX_VERSION 3007000 |
271 #define WALINDEX_MAX_VERSION 3007000 | 271 #define WALINDEX_MAX_VERSION 3007000 |
272 | 272 |
273 /* | 273 /* |
274 ** Indices of various locking bytes. WAL_NREADER is the number | 274 ** Indices of various locking bytes. WAL_NREADER is the number |
275 ** of available reader locks and should be at least 3. | 275 ** of available reader locks and should be at least 3. The default |
| 276 ** is SQLITE_SHM_NLOCK==8 and WAL_NREADER==5. |
276 */ | 277 */ |
277 #define WAL_WRITE_LOCK 0 | 278 #define WAL_WRITE_LOCK 0 |
278 #define WAL_ALL_BUT_WRITE 1 | 279 #define WAL_ALL_BUT_WRITE 1 |
279 #define WAL_CKPT_LOCK 1 | 280 #define WAL_CKPT_LOCK 1 |
280 #define WAL_RECOVER_LOCK 2 | 281 #define WAL_RECOVER_LOCK 2 |
281 #define WAL_READ_LOCK(I) (3+(I)) | 282 #define WAL_READ_LOCK(I) (3+(I)) |
282 #define WAL_NREADER (SQLITE_SHM_NLOCK-3) | 283 #define WAL_NREADER (SQLITE_SHM_NLOCK-3) |
283 | 284 |
284 | 285 |
285 /* Object declarations */ | 286 /* Object declarations */ |
286 typedef struct WalIndexHdr WalIndexHdr; | 287 typedef struct WalIndexHdr WalIndexHdr; |
287 typedef struct WalIterator WalIterator; | 288 typedef struct WalIterator WalIterator; |
288 typedef struct WalCkptInfo WalCkptInfo; | 289 typedef struct WalCkptInfo WalCkptInfo; |
289 | 290 |
290 | 291 |
291 /* | 292 /* |
292 ** The following object holds a copy of the wal-index header content. | 293 ** The following object holds a copy of the wal-index header content. |
293 ** | 294 ** |
294 ** The actual header in the wal-index consists of two copies of this | 295 ** The actual header in the wal-index consists of two copies of this |
295 ** object. | 296 ** object followed by one instance of the WalCkptInfo object. |
| 297 ** For all versions of SQLite through 3.10.0 and probably beyond, |
| 298 ** the locking bytes (WalCkptInfo.aLock) start at offset 120 and |
| 299 ** the total header size is 136 bytes. |
296 ** | 300 ** |
297 ** The szPage value can be any power of 2 between 512 and 32768, inclusive. | 301 ** The szPage value can be any power of 2 between 512 and 32768, inclusive. |
298 ** Or it can be 1 to represent a 65536-byte page. The latter case was | 302 ** Or it can be 1 to represent a 65536-byte page. The latter case was |
299 ** added in 3.7.1 when support for 64K pages was added. | 303 ** added in 3.7.1 when support for 64K pages was added. |
300 */ | 304 */ |
301 struct WalIndexHdr { | 305 struct WalIndexHdr { |
302 u32 iVersion; /* Wal-index version */ | 306 u32 iVersion; /* Wal-index version */ |
303 u32 unused; /* Unused (padding) field */ | 307 u32 unused; /* Unused (padding) field */ |
304 u32 iChange; /* Counter incremented each transaction */ | 308 u32 iChange; /* Counter incremented each transaction */ |
305 u8 isInit; /* 1 when initialized */ | 309 u8 isInit; /* 1 when initialized */ |
(...skipping 12 matching lines...) Expand all Loading... |
318 ** information used by checkpoint. | 322 ** information used by checkpoint. |
319 ** | 323 ** |
320 ** nBackfill is the number of frames in the WAL that have been written | 324 ** nBackfill is the number of frames in the WAL that have been written |
321 ** back into the database. (We call the act of moving content from WAL to | 325 ** back into the database. (We call the act of moving content from WAL to |
322 ** database "backfilling".) The nBackfill number is never greater than | 326 ** database "backfilling".) The nBackfill number is never greater than |
323 ** WalIndexHdr.mxFrame. nBackfill can only be increased by threads | 327 ** WalIndexHdr.mxFrame. nBackfill can only be increased by threads |
324 ** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). | 328 ** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). |
325 ** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from | 329 ** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from |
326 ** mxFrame back to zero when the WAL is reset. | 330 ** mxFrame back to zero when the WAL is reset. |
327 ** | 331 ** |
| 332 ** nBackfillAttempted is the largest value of nBackfill that a checkpoint |
| 333 ** has attempted to achieve. Normally nBackfill==nBackfillAtempted, however |
| 334 ** the nBackfillAttempted is set before any backfilling is done and the |
| 335 ** nBackfill is only set after all backfilling completes. So if a checkpoint |
| 336 ** crashes, nBackfillAttempted might be larger than nBackfill. The |
| 337 ** WalIndexHdr.mxFrame must never be less than nBackfillAttempted. |
| 338 ** |
| 339 ** The aLock[] field is a set of bytes used for locking. These bytes should |
| 340 ** never be read or written. |
| 341 ** |
328 ** There is one entry in aReadMark[] for each reader lock. If a reader | 342 ** There is one entry in aReadMark[] for each reader lock. If a reader |
329 ** holds read-lock K, then the value in aReadMark[K] is no greater than | 343 ** holds read-lock K, then the value in aReadMark[K] is no greater than |
330 ** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) | 344 ** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) |
331 ** for any aReadMark[] means that entry is unused. aReadMark[0] is | 345 ** for any aReadMark[] means that entry is unused. aReadMark[0] is |
332 ** a special case; its value is never used and it exists as a place-holder | 346 ** a special case; its value is never used and it exists as a place-holder |
333 ** to avoid having to offset aReadMark[] indexs by one. Readers holding | 347 ** to avoid having to offset aReadMark[] indexs by one. Readers holding |
334 ** WAL_READ_LOCK(0) always ignore the entire WAL and read all content | 348 ** WAL_READ_LOCK(0) always ignore the entire WAL and read all content |
335 ** directly from the database. | 349 ** directly from the database. |
336 ** | 350 ** |
337 ** The value of aReadMark[K] may only be changed by a thread that | 351 ** The value of aReadMark[K] may only be changed by a thread that |
(...skipping 19 matching lines...) Expand all Loading... |
357 ** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then | 371 ** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then |
358 ** the writer will first "reset" the WAL back to the beginning and start | 372 ** the writer will first "reset" the WAL back to the beginning and start |
359 ** writing new content beginning at frame 1. | 373 ** writing new content beginning at frame 1. |
360 ** | 374 ** |
361 ** We assume that 32-bit loads are atomic and so no locks are needed in | 375 ** We assume that 32-bit loads are atomic and so no locks are needed in |
362 ** order to read from any aReadMark[] entries. | 376 ** order to read from any aReadMark[] entries. |
363 */ | 377 */ |
364 struct WalCkptInfo { | 378 struct WalCkptInfo { |
365 u32 nBackfill; /* Number of WAL frames backfilled into DB */ | 379 u32 nBackfill; /* Number of WAL frames backfilled into DB */ |
366 u32 aReadMark[WAL_NREADER]; /* Reader marks */ | 380 u32 aReadMark[WAL_NREADER]; /* Reader marks */ |
| 381 u8 aLock[SQLITE_SHM_NLOCK]; /* Reserved space for locks */ |
| 382 u32 nBackfillAttempted; /* WAL frames perhaps written, or maybe not */ |
| 383 u32 notUsed0; /* Available for future enhancements */ |
367 }; | 384 }; |
368 #define READMARK_NOT_USED 0xffffffff | 385 #define READMARK_NOT_USED 0xffffffff |
369 | 386 |
370 | 387 |
371 /* A block of WALINDEX_LOCK_RESERVED bytes beginning at | 388 /* A block of WALINDEX_LOCK_RESERVED bytes beginning at |
372 ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems | 389 ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems |
373 ** only support mandatory file-locks, we do not read or write data | 390 ** only support mandatory file-locks, we do not read or write data |
374 ** from the region of the file on which locks are applied. | 391 ** from the region of the file on which locks are applied. |
375 */ | 392 */ |
376 #define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo)) | 393 #define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2+offsetof(WalCkptInfo,aLock)) |
377 #define WALINDEX_LOCK_RESERVED 16 | 394 #define WALINDEX_HDR_SIZE (sizeof(WalIndexHdr)*2+sizeof(WalCkptInfo)) |
378 #define WALINDEX_HDR_SIZE (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED) | |
379 | 395 |
380 /* Size of header before each frame in wal */ | 396 /* Size of header before each frame in wal */ |
381 #define WAL_FRAME_HDRSIZE 24 | 397 #define WAL_FRAME_HDRSIZE 24 |
382 | 398 |
383 /* Size of write ahead log header, including checksum. */ | 399 /* Size of write ahead log header, including checksum. */ |
384 /* #define WAL_HDRSIZE 24 */ | 400 /* #define WAL_HDRSIZE 24 */ |
385 #define WAL_HDRSIZE 32 | 401 #define WAL_HDRSIZE 32 |
386 | 402 |
387 /* WAL magic value. Either this value, or the same value with the least | 403 /* WAL magic value. Either this value, or the same value with the least |
388 ** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit | 404 ** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
421 i16 readLock; /* Which read lock is being held. -1 for none */ | 437 i16 readLock; /* Which read lock is being held. -1 for none */ |
422 u8 syncFlags; /* Flags to use to sync header writes */ | 438 u8 syncFlags; /* Flags to use to sync header writes */ |
423 u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ | 439 u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ |
424 u8 writeLock; /* True if in a write transaction */ | 440 u8 writeLock; /* True if in a write transaction */ |
425 u8 ckptLock; /* True if holding a checkpoint lock */ | 441 u8 ckptLock; /* True if holding a checkpoint lock */ |
426 u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ | 442 u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ |
427 u8 truncateOnCommit; /* True to truncate WAL file on commit */ | 443 u8 truncateOnCommit; /* True to truncate WAL file on commit */ |
428 u8 syncHeader; /* Fsync the WAL header if true */ | 444 u8 syncHeader; /* Fsync the WAL header if true */ |
429 u8 padToSectorBoundary; /* Pad transactions out to the next sector */ | 445 u8 padToSectorBoundary; /* Pad transactions out to the next sector */ |
430 WalIndexHdr hdr; /* Wal-index header for current transaction */ | 446 WalIndexHdr hdr; /* Wal-index header for current transaction */ |
| 447 u32 minFrame; /* Ignore wal frames before this one */ |
431 const char *zWalName; /* Name of WAL file */ | 448 const char *zWalName; /* Name of WAL file */ |
432 u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ | 449 u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ |
433 #ifdef SQLITE_DEBUG | 450 #ifdef SQLITE_DEBUG |
434 u8 lockError; /* True if a locking error has occurred */ | 451 u8 lockError; /* True if a locking error has occurred */ |
435 #endif | 452 #endif |
| 453 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 454 WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ |
| 455 #endif |
436 }; | 456 }; |
437 | 457 |
438 /* | 458 /* |
439 ** Candidate values for Wal.exclusiveMode. | 459 ** Candidate values for Wal.exclusiveMode. |
440 */ | 460 */ |
441 #define WAL_NORMAL_MODE 0 | 461 #define WAL_NORMAL_MODE 0 |
442 #define WAL_EXCLUSIVE_MODE 1 | 462 #define WAL_EXCLUSIVE_MODE 1 |
443 #define WAL_HEAPMEMORY_MODE 2 | 463 #define WAL_HEAPMEMORY_MODE 2 |
444 | 464 |
445 /* | 465 /* |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
515 ** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, | 535 ** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, |
516 ** then an SQLite error code is returned and *ppPage is set to 0. | 536 ** then an SQLite error code is returned and *ppPage is set to 0. |
517 */ | 537 */ |
518 static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ | 538 static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ |
519 int rc = SQLITE_OK; | 539 int rc = SQLITE_OK; |
520 | 540 |
521 /* Enlarge the pWal->apWiData[] array if required */ | 541 /* Enlarge the pWal->apWiData[] array if required */ |
522 if( pWal->nWiData<=iPage ){ | 542 if( pWal->nWiData<=iPage ){ |
523 int nByte = sizeof(u32*)*(iPage+1); | 543 int nByte = sizeof(u32*)*(iPage+1); |
524 volatile u32 **apNew; | 544 volatile u32 **apNew; |
525 apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte); | 545 apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); |
526 if( !apNew ){ | 546 if( !apNew ){ |
527 *ppPage = 0; | 547 *ppPage = 0; |
528 return SQLITE_NOMEM; | 548 return SQLITE_NOMEM; |
529 } | 549 } |
530 memset((void*)&apNew[pWal->nWiData], 0, | 550 memset((void*)&apNew[pWal->nWiData], 0, |
531 sizeof(u32*)*(iPage+1-pWal->nWiData)); | 551 sizeof(u32*)*(iPage+1-pWal->nWiData)); |
532 pWal->apWiData = apNew; | 552 pWal->apWiData = apNew; |
533 pWal->nWiData = iPage+1; | 553 pWal->nWiData = iPage+1; |
534 } | 554 } |
535 | 555 |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
641 ** The checksum on pWal->hdr is updated before it is written. | 661 ** The checksum on pWal->hdr is updated before it is written. |
642 */ | 662 */ |
643 static void walIndexWriteHdr(Wal *pWal){ | 663 static void walIndexWriteHdr(Wal *pWal){ |
644 volatile WalIndexHdr *aHdr = walIndexHdr(pWal); | 664 volatile WalIndexHdr *aHdr = walIndexHdr(pWal); |
645 const int nCksum = offsetof(WalIndexHdr, aCksum); | 665 const int nCksum = offsetof(WalIndexHdr, aCksum); |
646 | 666 |
647 assert( pWal->writeLock ); | 667 assert( pWal->writeLock ); |
648 pWal->hdr.isInit = 1; | 668 pWal->hdr.isInit = 1; |
649 pWal->hdr.iVersion = WALINDEX_MAX_VERSION; | 669 pWal->hdr.iVersion = WALINDEX_MAX_VERSION; |
650 walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); | 670 walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); |
651 memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr)); | 671 memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); |
652 walShmBarrier(pWal); | 672 walShmBarrier(pWal); |
653 memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr)); | 673 memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); |
654 } | 674 } |
655 | 675 |
656 /* | 676 /* |
657 ** This function encodes a single frame header and writes it to a buffer | 677 ** This function encodes a single frame header and writes it to a buffer |
658 ** supplied by the caller. A frame-header is made up of a series of | 678 ** supplied by the caller. A frame-header is made up of a series of |
659 ** 4-byte big-endian integers, as follows: | 679 ** 4-byte big-endian integers, as follows: |
660 ** | 680 ** |
661 ** 0: Page number. | 681 ** 0: Page number. |
662 ** 4: For commit records, the size of the database image in pages | 682 ** 4: For commit records, the size of the database image in pages |
663 ** after the commit. For all other records, zero. | 683 ** after the commit. For all other records, zero. |
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
944 ** frame numbers greater than pWal->hdr.mxFrame. | 964 ** frame numbers greater than pWal->hdr.mxFrame. |
945 */ | 965 */ |
946 nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]); | 966 nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]); |
947 memset((void *)&aPgno[iLimit+1], 0, nByte); | 967 memset((void *)&aPgno[iLimit+1], 0, nByte); |
948 | 968 |
949 #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT | 969 #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT |
950 /* Verify that the every entry in the mapping region is still reachable | 970 /* Verify that the every entry in the mapping region is still reachable |
951 ** via the hash table even after the cleanup. | 971 ** via the hash table even after the cleanup. |
952 */ | 972 */ |
953 if( iLimit ){ | 973 if( iLimit ){ |
954 int i; /* Loop counter */ | 974 int j; /* Loop counter */ |
955 int iKey; /* Hash key */ | 975 int iKey; /* Hash key */ |
956 for(i=1; i<=iLimit; i++){ | 976 for(j=1; j<=iLimit; j++){ |
957 for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ | 977 for(iKey=walHash(aPgno[j]); aHash[iKey]; iKey=walNextHash(iKey)){ |
958 if( aHash[iKey]==i ) break; | 978 if( aHash[iKey]==j ) break; |
959 } | 979 } |
960 assert( aHash[iKey]==i ); | 980 assert( aHash[iKey]==j ); |
961 } | 981 } |
962 } | 982 } |
963 #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ | 983 #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ |
964 } | 984 } |
965 | 985 |
966 | 986 |
967 /* | 987 /* |
968 ** Set an entry in the wal-index that will map database page number | 988 ** Set an entry in the wal-index that will map database page number |
969 ** pPage into WAL frame iFrame. | 989 ** pPage into WAL frame iFrame. |
970 */ | 990 */ |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1139 /* Verify that the version number on the WAL format is one that | 1159 /* Verify that the version number on the WAL format is one that |
1140 ** are able to understand */ | 1160 ** are able to understand */ |
1141 version = sqlite3Get4byte(&aBuf[4]); | 1161 version = sqlite3Get4byte(&aBuf[4]); |
1142 if( version!=WAL_MAX_VERSION ){ | 1162 if( version!=WAL_MAX_VERSION ){ |
1143 rc = SQLITE_CANTOPEN_BKPT; | 1163 rc = SQLITE_CANTOPEN_BKPT; |
1144 goto finished; | 1164 goto finished; |
1145 } | 1165 } |
1146 | 1166 |
1147 /* Malloc a buffer to read frames into. */ | 1167 /* Malloc a buffer to read frames into. */ |
1148 szFrame = szPage + WAL_FRAME_HDRSIZE; | 1168 szFrame = szPage + WAL_FRAME_HDRSIZE; |
1149 aFrame = (u8 *)sqlite3_malloc(szFrame); | 1169 aFrame = (u8 *)sqlite3_malloc64(szFrame); |
1150 if( !aFrame ){ | 1170 if( !aFrame ){ |
1151 rc = SQLITE_NOMEM; | 1171 rc = SQLITE_NOMEM; |
1152 goto recovery_error; | 1172 goto recovery_error; |
1153 } | 1173 } |
1154 aData = &aFrame[WAL_FRAME_HDRSIZE]; | 1174 aData = &aFrame[WAL_FRAME_HDRSIZE]; |
1155 | 1175 |
1156 /* Read all frames from the log file. */ | 1176 /* Read all frames from the log file. */ |
1157 iFrame = 0; | 1177 iFrame = 0; |
1158 for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ | 1178 for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ |
1159 u32 pgno; /* Database page number for frame */ | 1179 u32 pgno; /* Database page number for frame */ |
(...skipping 30 matching lines...) Expand all Loading... |
1190 pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; | 1210 pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; |
1191 pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; | 1211 pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; |
1192 walIndexWriteHdr(pWal); | 1212 walIndexWriteHdr(pWal); |
1193 | 1213 |
1194 /* Reset the checkpoint-header. This is safe because this thread is | 1214 /* Reset the checkpoint-header. This is safe because this thread is |
1195 ** currently holding locks that exclude all other readers, writers and | 1215 ** currently holding locks that exclude all other readers, writers and |
1196 ** checkpointers. | 1216 ** checkpointers. |
1197 */ | 1217 */ |
1198 pInfo = walCkptInfo(pWal); | 1218 pInfo = walCkptInfo(pWal); |
1199 pInfo->nBackfill = 0; | 1219 pInfo->nBackfill = 0; |
| 1220 pInfo->nBackfillAttempted = pWal->hdr.mxFrame; |
1200 pInfo->aReadMark[0] = 0; | 1221 pInfo->aReadMark[0] = 0; |
1201 for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | 1222 for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; |
1202 if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; | 1223 if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; |
1203 | 1224 |
1204 /* If more than one frame was recovered from the log file, report an | 1225 /* If more than one frame was recovered from the log file, report an |
1205 ** event via sqlite3_log(). This is to help with identifying performance | 1226 ** event via sqlite3_log(). This is to help with identifying performance |
1206 ** problems caused by applications routinely shutting down without | 1227 ** problems caused by applications routinely shutting down without |
1207 ** checkpointing the log file. | 1228 ** checkpointing the log file. |
1208 */ | 1229 */ |
1209 if( pWal->hdr.nPage ){ | 1230 if( pWal->hdr.nPage ){ |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1261 int rc; /* Return Code */ | 1282 int rc; /* Return Code */ |
1262 Wal *pRet; /* Object to allocate and return */ | 1283 Wal *pRet; /* Object to allocate and return */ |
1263 int flags; /* Flags passed to OsOpen() */ | 1284 int flags; /* Flags passed to OsOpen() */ |
1264 | 1285 |
1265 assert( zWalName && zWalName[0] ); | 1286 assert( zWalName && zWalName[0] ); |
1266 assert( pDbFd ); | 1287 assert( pDbFd ); |
1267 | 1288 |
1268 /* In the amalgamation, the os_unix.c and os_win.c source files come before | 1289 /* In the amalgamation, the os_unix.c and os_win.c source files come before |
1269 ** this source file. Verify that the #defines of the locking byte offsets | 1290 ** this source file. Verify that the #defines of the locking byte offsets |
1270 ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value. | 1291 ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value. |
| 1292 ** For that matter, if the lock offset ever changes from its initial design |
| 1293 ** value of 120, we need to know that so there is an assert() to check it. |
1271 */ | 1294 */ |
| 1295 assert( 120==WALINDEX_LOCK_OFFSET ); |
| 1296 assert( 136==WALINDEX_HDR_SIZE ); |
1272 #ifdef WIN_SHM_BASE | 1297 #ifdef WIN_SHM_BASE |
1273 assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET ); | 1298 assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET ); |
1274 #endif | 1299 #endif |
1275 #ifdef UNIX_SHM_BASE | 1300 #ifdef UNIX_SHM_BASE |
1276 assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); | 1301 assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); |
1277 #endif | 1302 #endif |
1278 | 1303 |
1279 | 1304 |
1280 /* Allocate an instance of struct Wal to return. */ | 1305 /* Allocate an instance of struct Wal to return. */ |
1281 *ppWal = 0; | 1306 *ppWal = 0; |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1452 ){ | 1477 ){ |
1453 struct Sublist { | 1478 struct Sublist { |
1454 int nList; /* Number of elements in aList */ | 1479 int nList; /* Number of elements in aList */ |
1455 ht_slot *aList; /* Pointer to sub-list content */ | 1480 ht_slot *aList; /* Pointer to sub-list content */ |
1456 }; | 1481 }; |
1457 | 1482 |
1458 const int nList = *pnList; /* Size of input list */ | 1483 const int nList = *pnList; /* Size of input list */ |
1459 int nMerge = 0; /* Number of elements in list aMerge */ | 1484 int nMerge = 0; /* Number of elements in list aMerge */ |
1460 ht_slot *aMerge = 0; /* List to be merged */ | 1485 ht_slot *aMerge = 0; /* List to be merged */ |
1461 int iList; /* Index into input list */ | 1486 int iList; /* Index into input list */ |
1462 int iSub = 0; /* Index into aSub array */ | 1487 u32 iSub = 0; /* Index into aSub array */ |
1463 struct Sublist aSub[13]; /* Array of sub-lists */ | 1488 struct Sublist aSub[13]; /* Array of sub-lists */ |
1464 | 1489 |
1465 memset(aSub, 0, sizeof(aSub)); | 1490 memset(aSub, 0, sizeof(aSub)); |
1466 assert( nList<=HASHTABLE_NPAGE && nList>0 ); | 1491 assert( nList<=HASHTABLE_NPAGE && nList>0 ); |
1467 assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) ); | 1492 assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) ); |
1468 | 1493 |
1469 for(iList=0; iList<nList; iList++){ | 1494 for(iList=0; iList<nList; iList++){ |
1470 nMerge = 1; | 1495 nMerge = 1; |
1471 aMerge = &aList[iList]; | 1496 aMerge = &aList[iList]; |
1472 for(iSub=0; iList & (1<<iSub); iSub++){ | 1497 for(iSub=0; iList & (1<<iSub); iSub++){ |
1473 struct Sublist *p = &aSub[iSub]; | 1498 struct Sublist *p; |
| 1499 assert( iSub<ArraySize(aSub) ); |
| 1500 p = &aSub[iSub]; |
1474 assert( p->aList && p->nList<=(1<<iSub) ); | 1501 assert( p->aList && p->nList<=(1<<iSub) ); |
1475 assert( p->aList==&aList[iList&~((2<<iSub)-1)] ); | 1502 assert( p->aList==&aList[iList&~((2<<iSub)-1)] ); |
1476 walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); | 1503 walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); |
1477 } | 1504 } |
1478 aSub[iSub].aList = aMerge; | 1505 aSub[iSub].aList = aMerge; |
1479 aSub[iSub].nList = nMerge; | 1506 aSub[iSub].nList = nMerge; |
1480 } | 1507 } |
1481 | 1508 |
1482 for(iSub++; iSub<ArraySize(aSub); iSub++){ | 1509 for(iSub++; iSub<ArraySize(aSub); iSub++){ |
1483 if( nList & (1<<iSub) ){ | 1510 if( nList & (1<<iSub) ){ |
1484 struct Sublist *p = &aSub[iSub]; | 1511 struct Sublist *p; |
| 1512 assert( iSub<ArraySize(aSub) ); |
| 1513 p = &aSub[iSub]; |
1485 assert( p->nList<=(1<<iSub) ); | 1514 assert( p->nList<=(1<<iSub) ); |
1486 assert( p->aList==&aList[nList&~((2<<iSub)-1)] ); | 1515 assert( p->aList==&aList[nList&~((2<<iSub)-1)] ); |
1487 walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); | 1516 walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); |
1488 } | 1517 } |
1489 } | 1518 } |
1490 assert( aMerge==aList ); | 1519 assert( aMerge==aList ); |
1491 *pnList = nMerge; | 1520 *pnList = nMerge; |
1492 | 1521 |
1493 #ifdef SQLITE_DEBUG | 1522 #ifdef SQLITE_DEBUG |
1494 { | 1523 { |
1495 int i; | 1524 int i; |
1496 for(i=1; i<*pnList; i++){ | 1525 for(i=1; i<*pnList; i++){ |
1497 assert( aContent[aList[i]] > aContent[aList[i-1]] ); | 1526 assert( aContent[aList[i]] > aContent[aList[i-1]] ); |
1498 } | 1527 } |
1499 } | 1528 } |
1500 #endif | 1529 #endif |
1501 } | 1530 } |
1502 | 1531 |
1503 /* | 1532 /* |
1504 ** Free an iterator allocated by walIteratorInit(). | 1533 ** Free an iterator allocated by walIteratorInit(). |
1505 */ | 1534 */ |
1506 static void walIteratorFree(WalIterator *p){ | 1535 static void walIteratorFree(WalIterator *p){ |
1507 sqlite3ScratchFree(p); | 1536 sqlite3_free(p); |
1508 } | 1537 } |
1509 | 1538 |
1510 /* | 1539 /* |
1511 ** Construct a WalInterator object that can be used to loop over all | 1540 ** Construct a WalInterator object that can be used to loop over all |
1512 ** pages in the WAL in ascending order. The caller must hold the checkpoint | 1541 ** pages in the WAL in ascending order. The caller must hold the checkpoint |
1513 ** lock. | 1542 ** lock. |
1514 ** | 1543 ** |
1515 ** On success, make *pp point to the newly allocated WalInterator object | 1544 ** On success, make *pp point to the newly allocated WalInterator object |
1516 ** return SQLITE_OK. Otherwise, return an error code. If this routine | 1545 ** return SQLITE_OK. Otherwise, return an error code. If this routine |
1517 ** returns an error, the value of *pp is undefined. | 1546 ** returns an error, the value of *pp is undefined. |
(...skipping 14 matching lines...) Expand all Loading... |
1532 ** it only runs if there is actually content in the log (mxFrame>0). | 1561 ** it only runs if there is actually content in the log (mxFrame>0). |
1533 */ | 1562 */ |
1534 assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); | 1563 assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); |
1535 iLast = pWal->hdr.mxFrame; | 1564 iLast = pWal->hdr.mxFrame; |
1536 | 1565 |
1537 /* Allocate space for the WalIterator object. */ | 1566 /* Allocate space for the WalIterator object. */ |
1538 nSegment = walFramePage(iLast) + 1; | 1567 nSegment = walFramePage(iLast) + 1; |
1539 nByte = sizeof(WalIterator) | 1568 nByte = sizeof(WalIterator) |
1540 + (nSegment-1)*sizeof(struct WalSegment) | 1569 + (nSegment-1)*sizeof(struct WalSegment) |
1541 + iLast*sizeof(ht_slot); | 1570 + iLast*sizeof(ht_slot); |
1542 p = (WalIterator *)sqlite3ScratchMalloc(nByte); | 1571 p = (WalIterator *)sqlite3_malloc64(nByte); |
1543 if( !p ){ | 1572 if( !p ){ |
1544 return SQLITE_NOMEM; | 1573 return SQLITE_NOMEM; |
1545 } | 1574 } |
1546 memset(p, 0, nByte); | 1575 memset(p, 0, nByte); |
1547 p->nSegment = nSegment; | 1576 p->nSegment = nSegment; |
1548 | 1577 |
1549 /* Allocate temporary space used by the merge-sort routine. This block | 1578 /* Allocate temporary space used by the merge-sort routine. This block |
1550 ** of memory will be freed before this function returns. | 1579 ** of memory will be freed before this function returns. |
1551 */ | 1580 */ |
1552 aTmp = (ht_slot *)sqlite3ScratchMalloc( | 1581 aTmp = (ht_slot *)sqlite3_malloc64( |
1553 sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) | 1582 sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) |
1554 ); | 1583 ); |
1555 if( !aTmp ){ | 1584 if( !aTmp ){ |
1556 rc = SQLITE_NOMEM; | 1585 rc = SQLITE_NOMEM; |
1557 } | 1586 } |
1558 | 1587 |
1559 for(i=0; rc==SQLITE_OK && i<nSegment; i++){ | 1588 for(i=0; rc==SQLITE_OK && i<nSegment; i++){ |
1560 volatile ht_slot *aHash; | 1589 volatile ht_slot *aHash; |
1561 u32 iZero; | 1590 u32 iZero; |
1562 volatile u32 *aPgno; | 1591 volatile u32 *aPgno; |
(...skipping 16 matching lines...) Expand all Loading... |
1579 for(j=0; j<nEntry; j++){ | 1608 for(j=0; j<nEntry; j++){ |
1580 aIndex[j] = (ht_slot)j; | 1609 aIndex[j] = (ht_slot)j; |
1581 } | 1610 } |
1582 walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry); | 1611 walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry); |
1583 p->aSegment[i].iZero = iZero; | 1612 p->aSegment[i].iZero = iZero; |
1584 p->aSegment[i].nEntry = nEntry; | 1613 p->aSegment[i].nEntry = nEntry; |
1585 p->aSegment[i].aIndex = aIndex; | 1614 p->aSegment[i].aIndex = aIndex; |
1586 p->aSegment[i].aPgno = (u32 *)aPgno; | 1615 p->aSegment[i].aPgno = (u32 *)aPgno; |
1587 } | 1616 } |
1588 } | 1617 } |
1589 sqlite3ScratchFree(aTmp); | 1618 sqlite3_free(aTmp); |
1590 | 1619 |
1591 if( rc!=SQLITE_OK ){ | 1620 if( rc!=SQLITE_OK ){ |
1592 walIteratorFree(p); | 1621 walIteratorFree(p); |
1593 } | 1622 } |
1594 *pp = p; | 1623 *pp = p; |
1595 return rc; | 1624 return rc; |
1596 } | 1625 } |
1597 | 1626 |
1598 /* | 1627 /* |
1599 ** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and | 1628 ** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and |
(...skipping 17 matching lines...) Expand all Loading... |
1617 | 1646 |
1618 /* | 1647 /* |
1619 ** The cache of the wal-index header must be valid to call this function. | 1648 ** The cache of the wal-index header must be valid to call this function. |
1620 ** Return the page-size in bytes used by the database. | 1649 ** Return the page-size in bytes used by the database. |
1621 */ | 1650 */ |
1622 static int walPagesize(Wal *pWal){ | 1651 static int walPagesize(Wal *pWal){ |
1623 return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); | 1652 return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); |
1624 } | 1653 } |
1625 | 1654 |
1626 /* | 1655 /* |
| 1656 ** The following is guaranteed when this function is called: |
| 1657 ** |
| 1658 ** a) the WRITER lock is held, |
| 1659 ** b) the entire log file has been checkpointed, and |
| 1660 ** c) any existing readers are reading exclusively from the database |
| 1661 ** file - there are no readers that may attempt to read a frame from |
| 1662 ** the log file. |
| 1663 ** |
| 1664 ** This function updates the shared-memory structures so that the next |
| 1665 ** client to write to the database (which may be this one) does so by |
| 1666 ** writing frames into the start of the log file. |
| 1667 ** |
| 1668 ** The value of parameter salt1 is used as the aSalt[1] value in the |
| 1669 ** new wal-index header. It should be passed a pseudo-random value (i.e. |
| 1670 ** one obtained from sqlite3_randomness()). |
| 1671 */ |
| 1672 static void walRestartHdr(Wal *pWal, u32 salt1){ |
| 1673 volatile WalCkptInfo *pInfo = walCkptInfo(pWal); |
| 1674 int i; /* Loop counter */ |
| 1675 u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ |
| 1676 pWal->nCkpt++; |
| 1677 pWal->hdr.mxFrame = 0; |
| 1678 sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); |
| 1679 memcpy(&pWal->hdr.aSalt[1], &salt1, 4); |
| 1680 walIndexWriteHdr(pWal); |
| 1681 pInfo->nBackfill = 0; |
| 1682 pInfo->nBackfillAttempted = 0; |
| 1683 pInfo->aReadMark[1] = 0; |
| 1684 for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; |
| 1685 assert( pInfo->aReadMark[0]==0 ); |
| 1686 } |
| 1687 |
| 1688 /* |
1627 ** Copy as much content as we can from the WAL back into the database file | 1689 ** Copy as much content as we can from the WAL back into the database file |
1628 ** in response to an sqlite3_wal_checkpoint() request or the equivalent. | 1690 ** in response to an sqlite3_wal_checkpoint() request or the equivalent. |
1629 ** | 1691 ** |
1630 ** The amount of information copies from WAL to database might be limited | 1692 ** The amount of information copies from WAL to database might be limited |
1631 ** by active readers. This routine will never overwrite a database page | 1693 ** by active readers. This routine will never overwrite a database page |
1632 ** that a concurrent reader might be using. | 1694 ** that a concurrent reader might be using. |
1633 ** | 1695 ** |
1634 ** All I/O barrier operations (a.k.a fsyncs) occur in this routine when | 1696 ** All I/O barrier operations (a.k.a fsyncs) occur in this routine when |
1635 ** SQLite is in WAL-mode in synchronous=NORMAL. That means that if | 1697 ** SQLite is in WAL-mode in synchronous=NORMAL. That means that if |
1636 ** checkpoints are always run by a background thread or background | 1698 ** checkpoints are always run by a background thread or background |
(...skipping 13 matching lines...) Expand all Loading... |
1650 ** (A WAL reset or recovery will revert nBackfill to zero, but not increase | 1712 ** (A WAL reset or recovery will revert nBackfill to zero, but not increase |
1651 ** its value.) | 1713 ** its value.) |
1652 ** | 1714 ** |
1653 ** The caller must be holding sufficient locks to ensure that no other | 1715 ** The caller must be holding sufficient locks to ensure that no other |
1654 ** checkpoint is running (in any other thread or process) at the same | 1716 ** checkpoint is running (in any other thread or process) at the same |
1655 ** time. | 1717 ** time. |
1656 */ | 1718 */ |
1657 static int walCheckpoint( | 1719 static int walCheckpoint( |
1658 Wal *pWal, /* Wal connection */ | 1720 Wal *pWal, /* Wal connection */ |
1659 int eMode, /* One of PASSIVE, FULL or RESTART */ | 1721 int eMode, /* One of PASSIVE, FULL or RESTART */ |
1660 int (*xBusyCall)(void*), /* Function to call when busy */ | 1722 int (*xBusy)(void*), /* Function to call when busy */ |
1661 void *pBusyArg, /* Context argument for xBusyHandler */ | 1723 void *pBusyArg, /* Context argument for xBusyHandler */ |
1662 int sync_flags, /* Flags for OsSync() (or 0) */ | 1724 int sync_flags, /* Flags for OsSync() (or 0) */ |
1663 u8 *zBuf /* Temporary buffer to use */ | 1725 u8 *zBuf /* Temporary buffer to use */ |
1664 ){ | 1726 ){ |
1665 int rc; /* Return code */ | 1727 int rc = SQLITE_OK; /* Return code */ |
1666 int szPage; /* Database page-size */ | 1728 int szPage; /* Database page-size */ |
1667 WalIterator *pIter = 0; /* Wal iterator context */ | 1729 WalIterator *pIter = 0; /* Wal iterator context */ |
1668 u32 iDbpage = 0; /* Next database page to write */ | 1730 u32 iDbpage = 0; /* Next database page to write */ |
1669 u32 iFrame = 0; /* Wal frame containing data for iDbpage */ | 1731 u32 iFrame = 0; /* Wal frame containing data for iDbpage */ |
1670 u32 mxSafeFrame; /* Max frame that can be backfilled */ | 1732 u32 mxSafeFrame; /* Max frame that can be backfilled */ |
1671 u32 mxPage; /* Max database page to write */ | 1733 u32 mxPage; /* Max database page to write */ |
1672 int i; /* Loop counter */ | 1734 int i; /* Loop counter */ |
1673 volatile WalCkptInfo *pInfo; /* The checkpoint status information */ | 1735 volatile WalCkptInfo *pInfo; /* The checkpoint status information */ |
1674 int (*xBusy)(void*) = 0; /* Function to call when waiting for locks */ | |
1675 | 1736 |
1676 szPage = walPagesize(pWal); | 1737 szPage = walPagesize(pWal); |
1677 testcase( szPage<=32768 ); | 1738 testcase( szPage<=32768 ); |
1678 testcase( szPage>=65536 ); | 1739 testcase( szPage>=65536 ); |
1679 pInfo = walCkptInfo(pWal); | 1740 pInfo = walCkptInfo(pWal); |
1680 if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK; | 1741 if( pInfo->nBackfill<pWal->hdr.mxFrame ){ |
1681 | 1742 |
1682 /* Allocate the iterator */ | 1743 /* Allocate the iterator */ |
1683 rc = walIteratorInit(pWal, &pIter); | 1744 rc = walIteratorInit(pWal, &pIter); |
1684 if( rc!=SQLITE_OK ){ | 1745 if( rc!=SQLITE_OK ){ |
1685 return rc; | 1746 return rc; |
1686 } | 1747 } |
1687 assert( pIter ); | 1748 assert( pIter ); |
1688 | 1749 |
1689 if( eMode!=SQLITE_CHECKPOINT_PASSIVE ) xBusy = xBusyCall; | 1750 /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked |
| 1751 ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ |
| 1752 assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); |
1690 | 1753 |
1691 /* Compute in mxSafeFrame the index of the last frame of the WAL that is | 1754 /* Compute in mxSafeFrame the index of the last frame of the WAL that is |
1692 ** safe to write into the database. Frames beyond mxSafeFrame might | 1755 ** safe to write into the database. Frames beyond mxSafeFrame might |
1693 ** overwrite database pages that are in use by active readers and thus | 1756 ** overwrite database pages that are in use by active readers and thus |
1694 ** cannot be backfilled from the WAL. | 1757 ** cannot be backfilled from the WAL. |
1695 */ | 1758 */ |
1696 mxSafeFrame = pWal->hdr.mxFrame; | 1759 mxSafeFrame = pWal->hdr.mxFrame; |
1697 mxPage = pWal->hdr.nPage; | 1760 mxPage = pWal->hdr.nPage; |
1698 for(i=1; i<WAL_NREADER; i++){ | 1761 for(i=1; i<WAL_NREADER; i++){ |
1699 u32 y = pInfo->aReadMark[i]; | 1762 /* Thread-sanitizer reports that the following is an unsafe read, |
1700 if( mxSafeFrame>y ){ | 1763 ** as some other thread may be in the process of updating the value |
1701 assert( y<=pWal->hdr.mxFrame ); | 1764 ** of the aReadMark[] slot. The assumption here is that if that is |
1702 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); | 1765 ** happening, the other client may only be increasing the value, |
| 1766 ** not decreasing it. So assuming either that either the "old" or |
| 1767 ** "new" version of the value is read, and not some arbitrary value |
| 1768 ** that would never be written by a real client, things are still |
| 1769 ** safe. */ |
| 1770 u32 y = pInfo->aReadMark[i]; |
| 1771 if( mxSafeFrame>y ){ |
| 1772 assert( y<=pWal->hdr.mxFrame ); |
| 1773 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); |
| 1774 if( rc==SQLITE_OK ){ |
| 1775 pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); |
| 1776 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); |
| 1777 }else if( rc==SQLITE_BUSY ){ |
| 1778 mxSafeFrame = y; |
| 1779 xBusy = 0; |
| 1780 }else{ |
| 1781 goto walcheckpoint_out; |
| 1782 } |
| 1783 } |
| 1784 } |
| 1785 |
| 1786 if( pInfo->nBackfill<mxSafeFrame |
| 1787 && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK |
| 1788 ){ |
| 1789 i64 nSize; /* Current size of database file */ |
| 1790 u32 nBackfill = pInfo->nBackfill; |
| 1791 |
| 1792 pInfo->nBackfillAttempted = mxSafeFrame; |
| 1793 |
| 1794 /* Sync the WAL to disk */ |
| 1795 if( sync_flags ){ |
| 1796 rc = sqlite3OsSync(pWal->pWalFd, sync_flags); |
| 1797 } |
| 1798 |
| 1799 /* If the database may grow as a result of this checkpoint, hint |
| 1800 ** about the eventual size of the db file to the VFS layer. |
| 1801 */ |
1703 if( rc==SQLITE_OK ){ | 1802 if( rc==SQLITE_OK ){ |
1704 pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); | 1803 i64 nReq = ((i64)mxPage * szPage); |
1705 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); | 1804 rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); |
1706 }else if( rc==SQLITE_BUSY ){ | 1805 if( rc==SQLITE_OK && nSize<nReq ){ |
1707 mxSafeFrame = y; | 1806 sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); |
1708 xBusy = 0; | 1807 } |
1709 }else{ | |
1710 goto walcheckpoint_out; | |
1711 } | 1808 } |
| 1809 |
| 1810 |
| 1811 /* Iterate through the contents of the WAL, copying data to the db file */ |
| 1812 while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ |
| 1813 i64 iOffset; |
| 1814 assert( walFramePgno(pWal, iFrame)==iDbpage ); |
| 1815 if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ |
| 1816 continue; |
| 1817 } |
| 1818 iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; |
| 1819 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ |
| 1820 rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); |
| 1821 if( rc!=SQLITE_OK ) break; |
| 1822 iOffset = (iDbpage-1)*(i64)szPage; |
| 1823 testcase( IS_BIG_INT(iOffset) ); |
| 1824 rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); |
| 1825 if( rc!=SQLITE_OK ) break; |
| 1826 } |
| 1827 |
| 1828 /* If work was actually accomplished... */ |
| 1829 if( rc==SQLITE_OK ){ |
| 1830 if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ |
| 1831 i64 szDb = pWal->hdr.nPage*(i64)szPage; |
| 1832 testcase( IS_BIG_INT(szDb) ); |
| 1833 rc = sqlite3OsTruncate(pWal->pDbFd, szDb); |
| 1834 if( rc==SQLITE_OK && sync_flags ){ |
| 1835 rc = sqlite3OsSync(pWal->pDbFd, sync_flags); |
| 1836 } |
| 1837 } |
| 1838 if( rc==SQLITE_OK ){ |
| 1839 pInfo->nBackfill = mxSafeFrame; |
| 1840 } |
| 1841 } |
| 1842 |
| 1843 /* Release the reader lock held while backfilling */ |
| 1844 walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); |
| 1845 } |
| 1846 |
| 1847 if( rc==SQLITE_BUSY ){ |
| 1848 /* Reset the return code so as not to report a checkpoint failure |
| 1849 ** just because there are active readers. */ |
| 1850 rc = SQLITE_OK; |
1712 } | 1851 } |
1713 } | 1852 } |
1714 | 1853 |
1715 if( pInfo->nBackfill<mxSafeFrame | 1854 /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the |
1716 && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK | 1855 ** entire wal file has been copied into the database file, then block |
1717 ){ | 1856 ** until all readers have finished using the wal file. This ensures that |
1718 i64 nSize; /* Current size of database file */ | 1857 ** the next process to write to the database restarts the wal file. |
1719 u32 nBackfill = pInfo->nBackfill; | |
1720 | |
1721 /* Sync the WAL to disk */ | |
1722 if( sync_flags ){ | |
1723 rc = sqlite3OsSync(pWal->pWalFd, sync_flags); | |
1724 } | |
1725 | |
1726 /* If the database may grow as a result of this checkpoint, hint | |
1727 ** about the eventual size of the db file to the VFS layer. | |
1728 */ | |
1729 if( rc==SQLITE_OK ){ | |
1730 i64 nReq = ((i64)mxPage * szPage); | |
1731 rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); | |
1732 if( rc==SQLITE_OK && nSize<nReq ){ | |
1733 sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); | |
1734 } | |
1735 } | |
1736 | |
1737 | |
1738 /* Iterate through the contents of the WAL, copying data to the db file. */ | |
1739 while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ | |
1740 i64 iOffset; | |
1741 assert( walFramePgno(pWal, iFrame)==iDbpage ); | |
1742 if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue; | |
1743 iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; | |
1744 /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ | |
1745 rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); | |
1746 if( rc!=SQLITE_OK ) break; | |
1747 iOffset = (iDbpage-1)*(i64)szPage; | |
1748 testcase( IS_BIG_INT(iOffset) ); | |
1749 rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); | |
1750 if( rc!=SQLITE_OK ) break; | |
1751 } | |
1752 | |
1753 /* If work was actually accomplished... */ | |
1754 if( rc==SQLITE_OK ){ | |
1755 if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ | |
1756 i64 szDb = pWal->hdr.nPage*(i64)szPage; | |
1757 testcase( IS_BIG_INT(szDb) ); | |
1758 rc = sqlite3OsTruncate(pWal->pDbFd, szDb); | |
1759 if( rc==SQLITE_OK && sync_flags ){ | |
1760 rc = sqlite3OsSync(pWal->pDbFd, sync_flags); | |
1761 } | |
1762 } | |
1763 if( rc==SQLITE_OK ){ | |
1764 pInfo->nBackfill = mxSafeFrame; | |
1765 } | |
1766 } | |
1767 | |
1768 /* Release the reader lock held while backfilling */ | |
1769 walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); | |
1770 } | |
1771 | |
1772 if( rc==SQLITE_BUSY ){ | |
1773 /* Reset the return code so as not to report a checkpoint failure | |
1774 ** just because there are active readers. */ | |
1775 rc = SQLITE_OK; | |
1776 } | |
1777 | |
1778 /* If this is an SQLITE_CHECKPOINT_RESTART operation, and the entire wal | |
1779 ** file has been copied into the database file, then block until all | |
1780 ** readers have finished using the wal file. This ensures that the next | |
1781 ** process to write to the database restarts the wal file. | |
1782 */ | 1858 */ |
1783 if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ | 1859 if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ |
1784 assert( pWal->writeLock ); | 1860 assert( pWal->writeLock ); |
1785 if( pInfo->nBackfill<pWal->hdr.mxFrame ){ | 1861 if( pInfo->nBackfill<pWal->hdr.mxFrame ){ |
1786 rc = SQLITE_BUSY; | 1862 rc = SQLITE_BUSY; |
1787 }else if( eMode==SQLITE_CHECKPOINT_RESTART ){ | 1863 }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ |
1788 assert( mxSafeFrame==pWal->hdr.mxFrame ); | 1864 u32 salt1; |
| 1865 sqlite3_randomness(4, &salt1); |
| 1866 assert( pInfo->nBackfill==pWal->hdr.mxFrame ); |
1789 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); | 1867 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); |
1790 if( rc==SQLITE_OK ){ | 1868 if( rc==SQLITE_OK ){ |
| 1869 if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ |
| 1870 /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as |
| 1871 ** SQLITE_CHECKPOINT_RESTART with the addition that it also |
| 1872 ** truncates the log file to zero bytes just prior to a |
| 1873 ** successful return. |
| 1874 ** |
| 1875 ** In theory, it might be safe to do this without updating the |
| 1876 ** wal-index header in shared memory, as all subsequent reader or |
| 1877 ** writer clients should see that the entire log file has been |
| 1878 ** checkpointed and behave accordingly. This seems unsafe though, |
| 1879 ** as it would leave the system in a state where the contents of |
| 1880 ** the wal-index header do not match the contents of the |
| 1881 ** file-system. To avoid this, update the wal-index header to |
| 1882 ** indicate that the log file contains zero valid frames. */ |
| 1883 walRestartHdr(pWal, salt1); |
| 1884 rc = sqlite3OsTruncate(pWal->pWalFd, 0); |
| 1885 } |
1791 walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); | 1886 walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); |
1792 } | 1887 } |
1793 } | 1888 } |
1794 } | 1889 } |
1795 | 1890 |
1796 walcheckpoint_out: | 1891 walcheckpoint_out: |
1797 walIteratorFree(pIter); | 1892 walIteratorFree(pIter); |
1798 return rc; | 1893 return rc; |
1799 } | 1894 } |
1800 | 1895 |
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2072 ** update values of the aReadMark[] array in the header, but if it does | 2167 ** update values of the aReadMark[] array in the header, but if it does |
2073 ** so it takes care to hold an exclusive lock on the corresponding | 2168 ** so it takes care to hold an exclusive lock on the corresponding |
2074 ** WAL_READ_LOCK() while changing values. | 2169 ** WAL_READ_LOCK() while changing values. |
2075 */ | 2170 */ |
2076 static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ | 2171 static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ |
2077 volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ | 2172 volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ |
2078 u32 mxReadMark; /* Largest aReadMark[] value */ | 2173 u32 mxReadMark; /* Largest aReadMark[] value */ |
2079 int mxI; /* Index of largest aReadMark[] value */ | 2174 int mxI; /* Index of largest aReadMark[] value */ |
2080 int i; /* Loop counter */ | 2175 int i; /* Loop counter */ |
2081 int rc = SQLITE_OK; /* Return code */ | 2176 int rc = SQLITE_OK; /* Return code */ |
| 2177 u32 mxFrame; /* Wal frame to lock to */ |
2082 | 2178 |
2083 assert( pWal->readLock<0 ); /* Not currently locked */ | 2179 assert( pWal->readLock<0 ); /* Not currently locked */ |
2084 | 2180 |
2085 /* Take steps to avoid spinning forever if there is a protocol error. | 2181 /* Take steps to avoid spinning forever if there is a protocol error. |
2086 ** | 2182 ** |
2087 ** Circumstances that cause a RETRY should only last for the briefest | 2183 ** Circumstances that cause a RETRY should only last for the briefest |
2088 ** instances of time. No I/O or other system calls are done while the | 2184 ** instances of time. No I/O or other system calls are done while the |
2089 ** locks are held, so the locks should not be held for very long. But | 2185 ** locks are held, so the locks should not be held for very long. But |
2090 ** if we are unlucky, another process that is holding a lock might get | 2186 ** if we are unlucky, another process that is holding a lock might get |
2091 ** paged out or take a page-fault that is time-consuming to resolve, | 2187 ** paged out or take a page-fault that is time-consuming to resolve, |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2135 }else if( rc==SQLITE_BUSY ){ | 2231 }else if( rc==SQLITE_BUSY ){ |
2136 rc = SQLITE_BUSY_RECOVERY; | 2232 rc = SQLITE_BUSY_RECOVERY; |
2137 } | 2233 } |
2138 } | 2234 } |
2139 if( rc!=SQLITE_OK ){ | 2235 if( rc!=SQLITE_OK ){ |
2140 return rc; | 2236 return rc; |
2141 } | 2237 } |
2142 } | 2238 } |
2143 | 2239 |
2144 pInfo = walCkptInfo(pWal); | 2240 pInfo = walCkptInfo(pWal); |
2145 if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ | 2241 if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame |
| 2242 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 2243 && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0 |
| 2244 || 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr))) |
| 2245 #endif |
| 2246 ){ |
2146 /* The WAL has been completely backfilled (or it is empty). | 2247 /* The WAL has been completely backfilled (or it is empty). |
2147 ** and can be safely ignored. | 2248 ** and can be safely ignored. |
2148 */ | 2249 */ |
2149 rc = walLockShared(pWal, WAL_READ_LOCK(0)); | 2250 rc = walLockShared(pWal, WAL_READ_LOCK(0)); |
2150 walShmBarrier(pWal); | 2251 walShmBarrier(pWal); |
2151 if( rc==SQLITE_OK ){ | 2252 if( rc==SQLITE_OK ){ |
2152 if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ | 2253 if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ |
2153 /* It is not safe to allow the reader to continue here if frames | 2254 /* It is not safe to allow the reader to continue here if frames |
2154 ** may have been appended to the log before READ_LOCK(0) was obtained. | 2255 ** may have been appended to the log before READ_LOCK(0) was obtained. |
2155 ** When holding READ_LOCK(0), the reader ignores the entire log file, | 2256 ** When holding READ_LOCK(0), the reader ignores the entire log file, |
(...skipping 17 matching lines...) Expand all Loading... |
2173 } | 2274 } |
2174 } | 2275 } |
2175 | 2276 |
2176 /* If we get this far, it means that the reader will want to use | 2277 /* If we get this far, it means that the reader will want to use |
2177 ** the WAL to get at content from recent commits. The job now is | 2278 ** the WAL to get at content from recent commits. The job now is |
2178 ** to select one of the aReadMark[] entries that is closest to | 2279 ** to select one of the aReadMark[] entries that is closest to |
2179 ** but not exceeding pWal->hdr.mxFrame and lock that entry. | 2280 ** but not exceeding pWal->hdr.mxFrame and lock that entry. |
2180 */ | 2281 */ |
2181 mxReadMark = 0; | 2282 mxReadMark = 0; |
2182 mxI = 0; | 2283 mxI = 0; |
| 2284 mxFrame = pWal->hdr.mxFrame; |
| 2285 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 2286 if( pWal->pSnapshot && pWal->pSnapshot->mxFrame<mxFrame ){ |
| 2287 mxFrame = pWal->pSnapshot->mxFrame; |
| 2288 } |
| 2289 #endif |
2183 for(i=1; i<WAL_NREADER; i++){ | 2290 for(i=1; i<WAL_NREADER; i++){ |
2184 u32 thisMark = pInfo->aReadMark[i]; | 2291 u32 thisMark = pInfo->aReadMark[i]; |
2185 if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){ | 2292 if( mxReadMark<=thisMark && thisMark<=mxFrame ){ |
2186 assert( thisMark!=READMARK_NOT_USED ); | 2293 assert( thisMark!=READMARK_NOT_USED ); |
2187 mxReadMark = thisMark; | 2294 mxReadMark = thisMark; |
2188 mxI = i; | 2295 mxI = i; |
2189 } | 2296 } |
2190 } | 2297 } |
2191 /* There was once an "if" here. The extra "{" is to preserve indentation. */ | 2298 if( (pWal->readOnly & WAL_SHM_RDONLY)==0 |
2192 { | 2299 && (mxReadMark<mxFrame || mxI==0) |
2193 if( (pWal->readOnly & WAL_SHM_RDONLY)==0 | 2300 ){ |
2194 && (mxReadMark<pWal->hdr.mxFrame || mxI==0) | 2301 for(i=1; i<WAL_NREADER; i++){ |
2195 ){ | 2302 rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); |
2196 for(i=1; i<WAL_NREADER; i++){ | 2303 if( rc==SQLITE_OK ){ |
2197 rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1); | 2304 mxReadMark = pInfo->aReadMark[i] = mxFrame; |
2198 if( rc==SQLITE_OK ){ | 2305 mxI = i; |
2199 mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame; | 2306 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); |
2200 mxI = i; | 2307 break; |
2201 walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); | 2308 }else if( rc!=SQLITE_BUSY ){ |
2202 break; | 2309 return rc; |
2203 }else if( rc!=SQLITE_BUSY ){ | |
2204 return rc; | |
2205 } | |
2206 } | 2310 } |
2207 } | 2311 } |
2208 if( mxI==0 ){ | 2312 } |
2209 assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); | 2313 if( mxI==0 ){ |
2210 return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK; | 2314 assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); |
2211 } | 2315 return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK; |
| 2316 } |
2212 | 2317 |
2213 rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); | 2318 rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); |
2214 if( rc ){ | 2319 if( rc ){ |
2215 return rc==SQLITE_BUSY ? WAL_RETRY : rc; | 2320 return rc==SQLITE_BUSY ? WAL_RETRY : rc; |
2216 } | 2321 } |
2217 /* Now that the read-lock has been obtained, check that neither the | 2322 /* Now that the read-lock has been obtained, check that neither the |
2218 ** value in the aReadMark[] array or the contents of the wal-index | 2323 ** value in the aReadMark[] array or the contents of the wal-index |
2219 ** header have changed. | 2324 ** header have changed. |
2220 ** | 2325 ** |
2221 ** It is necessary to check that the wal-index header did not change | 2326 ** It is necessary to check that the wal-index header did not change |
2222 ** between the time it was read and when the shared-lock was obtained | 2327 ** between the time it was read and when the shared-lock was obtained |
2223 ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility | 2328 ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility |
2224 ** that the log file may have been wrapped by a writer, or that frames | 2329 ** that the log file may have been wrapped by a writer, or that frames |
2225 ** that occur later in the log than pWal->hdr.mxFrame may have been | 2330 ** that occur later in the log than pWal->hdr.mxFrame may have been |
2226 ** copied into the database by a checkpointer. If either of these things | 2331 ** copied into the database by a checkpointer. If either of these things |
2227 ** happened, then reading the database with the current value of | 2332 ** happened, then reading the database with the current value of |
2228 ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry | 2333 ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry |
2229 ** instead. | 2334 ** instead. |
2230 ** | 2335 ** |
2231 ** This does not guarantee that the copy of the wal-index header is up to | 2336 ** Before checking that the live wal-index header has not changed |
2232 ** date before proceeding. That would not be possible without somehow | 2337 ** since it was read, set Wal.minFrame to the first frame in the wal |
2233 ** blocking writers. It only guarantees that a dangerous checkpoint or | 2338 ** file that has not yet been checkpointed. This client will not need |
2234 ** log-wrap (either of which would require an exclusive lock on | 2339 ** to read any frames earlier than minFrame from the wal file - they |
2235 ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid. | 2340 ** can be safely read directly from the database file. |
2236 */ | 2341 ** |
2237 walShmBarrier(pWal); | 2342 ** Because a ShmBarrier() call is made between taking the copy of |
2238 if( pInfo->aReadMark[mxI]!=mxReadMark | 2343 ** nBackfill and checking that the wal-header in shared-memory still |
2239 || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) | 2344 ** matches the one cached in pWal->hdr, it is guaranteed that the |
2240 ){ | 2345 ** checkpointer that set nBackfill was not working with a wal-index |
2241 walUnlockShared(pWal, WAL_READ_LOCK(mxI)); | 2346 ** header newer than that cached in pWal->hdr. If it were, that could |
2242 return WAL_RETRY; | 2347 ** cause a problem. The checkpointer could omit to checkpoint |
2243 }else{ | 2348 ** a version of page X that lies before pWal->minFrame (call that version |
2244 assert( mxReadMark<=pWal->hdr.mxFrame ); | 2349 ** A) on the basis that there is a newer version (version B) of the same |
2245 pWal->readLock = (i16)mxI; | 2350 ** page later in the wal file. But if version B happens to like past |
2246 } | 2351 ** frame pWal->hdr.mxFrame - then the client would incorrectly assume |
| 2352 ** that it can read version A from the database file. However, since |
| 2353 ** we can guarantee that the checkpointer that set nBackfill could not |
| 2354 ** see any pages past pWal->hdr.mxFrame, this problem does not come up. |
| 2355 */ |
| 2356 pWal->minFrame = pInfo->nBackfill+1; |
| 2357 walShmBarrier(pWal); |
| 2358 if( pInfo->aReadMark[mxI]!=mxReadMark |
| 2359 || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) |
| 2360 ){ |
| 2361 walUnlockShared(pWal, WAL_READ_LOCK(mxI)); |
| 2362 return WAL_RETRY; |
| 2363 }else{ |
| 2364 assert( mxReadMark<=pWal->hdr.mxFrame ); |
| 2365 pWal->readLock = (i16)mxI; |
2247 } | 2366 } |
2248 return rc; | 2367 return rc; |
2249 } | 2368 } |
2250 | 2369 |
2251 /* | 2370 /* |
2252 ** Begin a read transaction on the database. | 2371 ** Begin a read transaction on the database. |
2253 ** | 2372 ** |
2254 ** This routine used to be called sqlite3OpenSnapshot() and with good reason: | 2373 ** This routine used to be called sqlite3OpenSnapshot() and with good reason: |
2255 ** it takes a snapshot of the state of the WAL and wal-index for the current | 2374 ** it takes a snapshot of the state of the WAL and wal-index for the current |
2256 ** instant in time. The current thread will continue to use this snapshot. | 2375 ** instant in time. The current thread will continue to use this snapshot. |
2257 ** Other threads might append new content to the WAL and wal-index but | 2376 ** Other threads might append new content to the WAL and wal-index but |
2258 ** that extra content is ignored by the current thread. | 2377 ** that extra content is ignored by the current thread. |
2259 ** | 2378 ** |
2260 ** If the database contents have changes since the previous read | 2379 ** If the database contents have changes since the previous read |
2261 ** transaction, then *pChanged is set to 1 before returning. The | 2380 ** transaction, then *pChanged is set to 1 before returning. The |
2262 ** Pager layer will use this to know that is cache is stale and | 2381 ** Pager layer will use this to know that is cache is stale and |
2263 ** needs to be flushed. | 2382 ** needs to be flushed. |
2264 */ | 2383 */ |
2265 int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ | 2384 int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ |
2266 int rc; /* Return code */ | 2385 int rc; /* Return code */ |
2267 int cnt = 0; /* Number of TryBeginRead attempts */ | 2386 int cnt = 0; /* Number of TryBeginRead attempts */ |
2268 | 2387 |
| 2388 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 2389 int bChanged = 0; |
| 2390 WalIndexHdr *pSnapshot = pWal->pSnapshot; |
| 2391 if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ |
| 2392 bChanged = 1; |
| 2393 } |
| 2394 #endif |
| 2395 |
2269 do{ | 2396 do{ |
2270 rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); | 2397 rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); |
2271 }while( rc==WAL_RETRY ); | 2398 }while( rc==WAL_RETRY ); |
2272 testcase( (rc&0xff)==SQLITE_BUSY ); | 2399 testcase( (rc&0xff)==SQLITE_BUSY ); |
2273 testcase( (rc&0xff)==SQLITE_IOERR ); | 2400 testcase( (rc&0xff)==SQLITE_IOERR ); |
2274 testcase( rc==SQLITE_PROTOCOL ); | 2401 testcase( rc==SQLITE_PROTOCOL ); |
2275 testcase( rc==SQLITE_OK ); | 2402 testcase( rc==SQLITE_OK ); |
| 2403 |
| 2404 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 2405 if( rc==SQLITE_OK ){ |
| 2406 if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ |
| 2407 /* At this point the client has a lock on an aReadMark[] slot holding |
| 2408 ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr |
| 2409 ** is populated with the wal-index header corresponding to the head |
| 2410 ** of the wal file. Verify that pSnapshot is still valid before |
| 2411 ** continuing. Reasons why pSnapshot might no longer be valid: |
| 2412 ** |
| 2413 ** (1) The WAL file has been reset since the snapshot was taken. |
| 2414 ** In this case, the salt will have changed. |
| 2415 ** |
| 2416 ** (2) A checkpoint as been attempted that wrote frames past |
| 2417 ** pSnapshot->mxFrame into the database file. Note that the |
| 2418 ** checkpoint need not have completed for this to cause problems. |
| 2419 */ |
| 2420 volatile WalCkptInfo *pInfo = walCkptInfo(pWal); |
| 2421 |
| 2422 assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 ); |
| 2423 assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame ); |
| 2424 |
| 2425 /* It is possible that there is a checkpointer thread running |
| 2426 ** concurrent with this code. If this is the case, it may be that the |
| 2427 ** checkpointer has already determined that it will checkpoint |
| 2428 ** snapshot X, where X is later in the wal file than pSnapshot, but |
| 2429 ** has not yet set the pInfo->nBackfillAttempted variable to indicate |
| 2430 ** its intent. To avoid the race condition this leads to, ensure that |
| 2431 ** there is no checkpointer process by taking a shared CKPT lock |
| 2432 ** before checking pInfo->nBackfillAttempted. */ |
| 2433 rc = walLockShared(pWal, WAL_CKPT_LOCK); |
| 2434 |
| 2435 if( rc==SQLITE_OK ){ |
| 2436 /* Check that the wal file has not been wrapped. Assuming that it has |
| 2437 ** not, also check that no checkpointer has attempted to checkpoint any |
| 2438 ** frames beyond pSnapshot->mxFrame. If either of these conditions are |
| 2439 ** true, return SQLITE_BUSY_SNAPSHOT. Otherwise, overwrite pWal->hdr |
| 2440 ** with *pSnapshot and set *pChanged as appropriate for opening the |
| 2441 ** snapshot. */ |
| 2442 if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt)) |
| 2443 && pSnapshot->mxFrame>=pInfo->nBackfillAttempted |
| 2444 ){ |
| 2445 assert( pWal->readLock>0 ); |
| 2446 memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr)); |
| 2447 *pChanged = bChanged; |
| 2448 }else{ |
| 2449 rc = SQLITE_BUSY_SNAPSHOT; |
| 2450 } |
| 2451 |
| 2452 /* Release the shared CKPT lock obtained above. */ |
| 2453 walUnlockShared(pWal, WAL_CKPT_LOCK); |
| 2454 } |
| 2455 |
| 2456 |
| 2457 if( rc!=SQLITE_OK ){ |
| 2458 sqlite3WalEndReadTransaction(pWal); |
| 2459 } |
| 2460 } |
| 2461 } |
| 2462 #endif |
2276 return rc; | 2463 return rc; |
2277 } | 2464 } |
2278 | 2465 |
2279 /* | 2466 /* |
2280 ** Finish with a read transaction. All this does is release the | 2467 ** Finish with a read transaction. All this does is release the |
2281 ** read-lock. | 2468 ** read-lock. |
2282 */ | 2469 */ |
2283 void sqlite3WalEndReadTransaction(Wal *pWal){ | 2470 void sqlite3WalEndReadTransaction(Wal *pWal){ |
2284 sqlite3WalEndWriteTransaction(pWal); | 2471 sqlite3WalEndWriteTransaction(pWal); |
2285 if( pWal->readLock>=0 ){ | 2472 if( pWal->readLock>=0 ){ |
(...skipping 11 matching lines...) Expand all Loading... |
2297 ** error does occur, the final value of *piRead is undefined. | 2484 ** error does occur, the final value of *piRead is undefined. |
2298 */ | 2485 */ |
2299 int sqlite3WalFindFrame( | 2486 int sqlite3WalFindFrame( |
2300 Wal *pWal, /* WAL handle */ | 2487 Wal *pWal, /* WAL handle */ |
2301 Pgno pgno, /* Database page number to read data for */ | 2488 Pgno pgno, /* Database page number to read data for */ |
2302 u32 *piRead /* OUT: Frame number (or zero) */ | 2489 u32 *piRead /* OUT: Frame number (or zero) */ |
2303 ){ | 2490 ){ |
2304 u32 iRead = 0; /* If !=0, WAL frame to return data from */ | 2491 u32 iRead = 0; /* If !=0, WAL frame to return data from */ |
2305 u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ | 2492 u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ |
2306 int iHash; /* Used to loop through N hash tables */ | 2493 int iHash; /* Used to loop through N hash tables */ |
| 2494 int iMinHash; |
2307 | 2495 |
2308 /* This routine is only be called from within a read transaction. */ | 2496 /* This routine is only be called from within a read transaction. */ |
2309 assert( pWal->readLock>=0 || pWal->lockError ); | 2497 assert( pWal->readLock>=0 || pWal->lockError ); |
2310 | 2498 |
2311 /* If the "last page" field of the wal-index header snapshot is 0, then | 2499 /* If the "last page" field of the wal-index header snapshot is 0, then |
2312 ** no data will be read from the wal under any circumstances. Return early | 2500 ** no data will be read from the wal under any circumstances. Return early |
2313 ** in this case as an optimization. Likewise, if pWal->readLock==0, | 2501 ** in this case as an optimization. Likewise, if pWal->readLock==0, |
2314 ** then the WAL is ignored by the reader so return early, as if the | 2502 ** then the WAL is ignored by the reader so return early, as if the |
2315 ** WAL were empty. | 2503 ** WAL were empty. |
2316 */ | 2504 */ |
(...skipping 20 matching lines...) Expand all Loading... |
2337 ** loop of the following block is more stringent that would be required | 2525 ** loop of the following block is more stringent that would be required |
2338 ** if we had exclusive access to the hash-table: | 2526 ** if we had exclusive access to the hash-table: |
2339 ** | 2527 ** |
2340 ** (aPgno[iFrame]==pgno): | 2528 ** (aPgno[iFrame]==pgno): |
2341 ** This condition filters out normal hash-table collisions. | 2529 ** This condition filters out normal hash-table collisions. |
2342 ** | 2530 ** |
2343 ** (iFrame<=iLast): | 2531 ** (iFrame<=iLast): |
2344 ** This condition filters out entries that were added to the hash | 2532 ** This condition filters out entries that were added to the hash |
2345 ** table after the current read-transaction had started. | 2533 ** table after the current read-transaction had started. |
2346 */ | 2534 */ |
2347 for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){ | 2535 iMinHash = walFramePage(pWal->minFrame); |
| 2536 for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){ |
2348 volatile ht_slot *aHash; /* Pointer to hash table */ | 2537 volatile ht_slot *aHash; /* Pointer to hash table */ |
2349 volatile u32 *aPgno; /* Pointer to array of page numbers */ | 2538 volatile u32 *aPgno; /* Pointer to array of page numbers */ |
2350 u32 iZero; /* Frame number corresponding to aPgno[0] */ | 2539 u32 iZero; /* Frame number corresponding to aPgno[0] */ |
2351 int iKey; /* Hash slot index */ | 2540 int iKey; /* Hash slot index */ |
2352 int nCollide; /* Number of hash collisions remaining */ | 2541 int nCollide; /* Number of hash collisions remaining */ |
2353 int rc; /* Error code */ | 2542 int rc; /* Error code */ |
2354 | 2543 |
2355 rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); | 2544 rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); |
2356 if( rc!=SQLITE_OK ){ | 2545 if( rc!=SQLITE_OK ){ |
2357 return rc; | 2546 return rc; |
2358 } | 2547 } |
2359 nCollide = HASHTABLE_NSLOT; | 2548 nCollide = HASHTABLE_NSLOT; |
2360 for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ | 2549 for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ |
2361 u32 iFrame = aHash[iKey] + iZero; | 2550 u32 iFrame = aHash[iKey] + iZero; |
2362 if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ | 2551 if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){ |
2363 /* assert( iFrame>iRead ); -- not true if there is corruption */ | 2552 assert( iFrame>iRead || CORRUPT_DB ); |
2364 iRead = iFrame; | 2553 iRead = iFrame; |
2365 } | 2554 } |
2366 if( (nCollide--)==0 ){ | 2555 if( (nCollide--)==0 ){ |
2367 return SQLITE_CORRUPT_BKPT; | 2556 return SQLITE_CORRUPT_BKPT; |
2368 } | 2557 } |
2369 } | 2558 } |
2370 } | 2559 } |
2371 | 2560 |
2372 #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT | 2561 #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT |
2373 /* If expensive assert() statements are available, do a linear search | 2562 /* If expensive assert() statements are available, do a linear search |
2374 ** of the wal-index file content. Make sure the results agree with the | 2563 ** of the wal-index file content. Make sure the results agree with the |
2375 ** result obtained using the hash indexes above. */ | 2564 ** result obtained using the hash indexes above. */ |
2376 { | 2565 { |
2377 u32 iRead2 = 0; | 2566 u32 iRead2 = 0; |
2378 u32 iTest; | 2567 u32 iTest; |
2379 for(iTest=iLast; iTest>0; iTest--){ | 2568 assert( pWal->minFrame>0 ); |
| 2569 for(iTest=iLast; iTest>=pWal->minFrame; iTest--){ |
2380 if( walFramePgno(pWal, iTest)==pgno ){ | 2570 if( walFramePgno(pWal, iTest)==pgno ){ |
2381 iRead2 = iTest; | 2571 iRead2 = iTest; |
2382 break; | 2572 break; |
2383 } | 2573 } |
2384 } | 2574 } |
2385 assert( iRead==iRead2 ); | 2575 assert( iRead==iRead2 ); |
2386 } | 2576 } |
2387 #endif | 2577 #endif |
2388 | 2578 |
2389 *piRead = iRead; | 2579 *piRead = iRead; |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2566 if( aWalData[0]<pWal->hdr.mxFrame ){ | 2756 if( aWalData[0]<pWal->hdr.mxFrame ){ |
2567 pWal->hdr.mxFrame = aWalData[0]; | 2757 pWal->hdr.mxFrame = aWalData[0]; |
2568 pWal->hdr.aFrameCksum[0] = aWalData[1]; | 2758 pWal->hdr.aFrameCksum[0] = aWalData[1]; |
2569 pWal->hdr.aFrameCksum[1] = aWalData[2]; | 2759 pWal->hdr.aFrameCksum[1] = aWalData[2]; |
2570 walCleanupHash(pWal); | 2760 walCleanupHash(pWal); |
2571 } | 2761 } |
2572 | 2762 |
2573 return rc; | 2763 return rc; |
2574 } | 2764 } |
2575 | 2765 |
2576 | |
2577 /* | 2766 /* |
2578 ** This function is called just before writing a set of frames to the log | 2767 ** This function is called just before writing a set of frames to the log |
2579 ** file (see sqlite3WalFrames()). It checks to see if, instead of appending | 2768 ** file (see sqlite3WalFrames()). It checks to see if, instead of appending |
2580 ** to the current log file, it is possible to overwrite the start of the | 2769 ** to the current log file, it is possible to overwrite the start of the |
2581 ** existing log file with the new frames (i.e. "reset" the log). If so, | 2770 ** existing log file with the new frames (i.e. "reset" the log). If so, |
2582 ** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left | 2771 ** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left |
2583 ** unchanged. | 2772 ** unchanged. |
2584 ** | 2773 ** |
2585 ** SQLITE_OK is returned if no error is encountered (regardless of whether | 2774 ** SQLITE_OK is returned if no error is encountered (regardless of whether |
2586 ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned | 2775 ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned |
(...skipping 12 matching lines...) Expand all Loading... |
2599 rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); | 2788 rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); |
2600 if( rc==SQLITE_OK ){ | 2789 if( rc==SQLITE_OK ){ |
2601 /* If all readers are using WAL_READ_LOCK(0) (in other words if no | 2790 /* If all readers are using WAL_READ_LOCK(0) (in other words if no |
2602 ** readers are currently using the WAL), then the transactions | 2791 ** readers are currently using the WAL), then the transactions |
2603 ** frames will overwrite the start of the existing log. Update the | 2792 ** frames will overwrite the start of the existing log. Update the |
2604 ** wal-index header to reflect this. | 2793 ** wal-index header to reflect this. |
2605 ** | 2794 ** |
2606 ** In theory it would be Ok to update the cache of the header only | 2795 ** In theory it would be Ok to update the cache of the header only |
2607 ** at this point. But updating the actual wal-index header is also | 2796 ** at this point. But updating the actual wal-index header is also |
2608 ** safe and means there is no special case for sqlite3WalUndo() | 2797 ** safe and means there is no special case for sqlite3WalUndo() |
2609 ** to handle if this transaction is rolled back. | 2798 ** to handle if this transaction is rolled back. */ |
2610 */ | 2799 walRestartHdr(pWal, salt1); |
2611 int i; /* Loop counter */ | |
2612 u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ | |
2613 | |
2614 pWal->nCkpt++; | |
2615 pWal->hdr.mxFrame = 0; | |
2616 sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); | |
2617 aSalt[1] = salt1; | |
2618 walIndexWriteHdr(pWal); | |
2619 pInfo->nBackfill = 0; | |
2620 pInfo->aReadMark[1] = 0; | |
2621 for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; | |
2622 assert( pInfo->aReadMark[0]==0 ); | |
2623 walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); | 2800 walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); |
2624 }else if( rc!=SQLITE_BUSY ){ | 2801 }else if( rc!=SQLITE_BUSY ){ |
2625 return rc; | 2802 return rc; |
2626 } | 2803 } |
2627 } | 2804 } |
2628 walUnlockShared(pWal, WAL_READ_LOCK(0)); | 2805 walUnlockShared(pWal, WAL_READ_LOCK(0)); |
2629 pWal->readLock = -1; | 2806 pWal->readLock = -1; |
2630 cnt = 0; | 2807 cnt = 0; |
2631 do{ | 2808 do{ |
2632 int notUsed; | 2809 int notUsed; |
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2900 ** related interfaces. | 3077 ** related interfaces. |
2901 ** | 3078 ** |
2902 ** Obtain a CHECKPOINT lock and then backfill as much information as | 3079 ** Obtain a CHECKPOINT lock and then backfill as much information as |
2903 ** we can from WAL into the database. | 3080 ** we can from WAL into the database. |
2904 ** | 3081 ** |
2905 ** If parameter xBusy is not NULL, it is a pointer to a busy-handler | 3082 ** If parameter xBusy is not NULL, it is a pointer to a busy-handler |
2906 ** callback. In this case this function runs a blocking checkpoint. | 3083 ** callback. In this case this function runs a blocking checkpoint. |
2907 */ | 3084 */ |
2908 int sqlite3WalCheckpoint( | 3085 int sqlite3WalCheckpoint( |
2909 Wal *pWal, /* Wal connection */ | 3086 Wal *pWal, /* Wal connection */ |
2910 int eMode, /* PASSIVE, FULL or RESTART */ | 3087 int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ |
2911 int (*xBusy)(void*), /* Function to call when busy */ | 3088 int (*xBusy)(void*), /* Function to call when busy */ |
2912 void *pBusyArg, /* Context argument for xBusyHandler */ | 3089 void *pBusyArg, /* Context argument for xBusyHandler */ |
2913 int sync_flags, /* Flags to sync db file with (or 0) */ | 3090 int sync_flags, /* Flags to sync db file with (or 0) */ |
2914 int nBuf, /* Size of temporary buffer */ | 3091 int nBuf, /* Size of temporary buffer */ |
2915 u8 *zBuf, /* Temporary buffer to use */ | 3092 u8 *zBuf, /* Temporary buffer to use */ |
2916 int *pnLog, /* OUT: Number of frames in WAL */ | 3093 int *pnLog, /* OUT: Number of frames in WAL */ |
2917 int *pnCkpt /* OUT: Number of backfilled frames in WAL */ | 3094 int *pnCkpt /* OUT: Number of backfilled frames in WAL */ |
2918 ){ | 3095 ){ |
2919 int rc; /* Return code */ | 3096 int rc; /* Return code */ |
2920 int isChanged = 0; /* True if a new wal-index header is loaded */ | 3097 int isChanged = 0; /* True if a new wal-index header is loaded */ |
2921 int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ | 3098 int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ |
| 3099 int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ |
2922 | 3100 |
2923 assert( pWal->ckptLock==0 ); | 3101 assert( pWal->ckptLock==0 ); |
2924 assert( pWal->writeLock==0 ); | 3102 assert( pWal->writeLock==0 ); |
2925 | 3103 |
| 3104 /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked |
| 3105 ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ |
| 3106 assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); |
| 3107 |
2926 if( pWal->readOnly ) return SQLITE_READONLY; | 3108 if( pWal->readOnly ) return SQLITE_READONLY; |
2927 WALTRACE(("WAL%p: checkpoint begins\n", pWal)); | 3109 WALTRACE(("WAL%p: checkpoint begins\n", pWal)); |
| 3110 |
| 3111 /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive |
| 3112 ** "checkpoint" lock on the database file. */ |
2928 rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); | 3113 rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); |
2929 if( rc ){ | 3114 if( rc ){ |
2930 /* Usually this is SQLITE_BUSY meaning that another thread or process | 3115 /* EVIDENCE-OF: R-10421-19736 If any other process is running a |
2931 ** is already running a checkpoint, or maybe a recovery. But it might | 3116 ** checkpoint operation at the same time, the lock cannot be obtained and |
2932 ** also be SQLITE_IOERR. */ | 3117 ** SQLITE_BUSY is returned. |
| 3118 ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, |
| 3119 ** it will not be invoked in this case. |
| 3120 */ |
| 3121 testcase( rc==SQLITE_BUSY ); |
| 3122 testcase( xBusy!=0 ); |
2933 return rc; | 3123 return rc; |
2934 } | 3124 } |
2935 pWal->ckptLock = 1; | 3125 pWal->ckptLock = 1; |
2936 | 3126 |
2937 /* If this is a blocking-checkpoint, then obtain the write-lock as well | 3127 /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and |
2938 ** to prevent any writers from running while the checkpoint is underway. | 3128 ** TRUNCATE modes also obtain the exclusive "writer" lock on the database |
2939 ** This has to be done before the call to walIndexReadHdr() below. | 3129 ** file. |
2940 ** | 3130 ** |
2941 ** If the writer lock cannot be obtained, then a passive checkpoint is | 3131 ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained |
2942 ** run instead. Since the checkpointer is not holding the writer lock, | 3132 ** immediately, and a busy-handler is configured, it is invoked and the |
2943 ** there is no point in blocking waiting for any readers. Assuming no | 3133 ** writer lock retried until either the busy-handler returns 0 or the |
2944 ** other error occurs, this function will return SQLITE_BUSY to the caller. | 3134 ** lock is successfully obtained. |
2945 */ | 3135 */ |
2946 if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ | 3136 if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ |
2947 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); | 3137 rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); |
2948 if( rc==SQLITE_OK ){ | 3138 if( rc==SQLITE_OK ){ |
2949 pWal->writeLock = 1; | 3139 pWal->writeLock = 1; |
2950 }else if( rc==SQLITE_BUSY ){ | 3140 }else if( rc==SQLITE_BUSY ){ |
2951 eMode2 = SQLITE_CHECKPOINT_PASSIVE; | 3141 eMode2 = SQLITE_CHECKPOINT_PASSIVE; |
| 3142 xBusy2 = 0; |
2952 rc = SQLITE_OK; | 3143 rc = SQLITE_OK; |
2953 } | 3144 } |
2954 } | 3145 } |
2955 | 3146 |
2956 /* Read the wal-index header. */ | 3147 /* Read the wal-index header. */ |
2957 if( rc==SQLITE_OK ){ | 3148 if( rc==SQLITE_OK ){ |
2958 rc = walIndexReadHdr(pWal, &isChanged); | 3149 rc = walIndexReadHdr(pWal, &isChanged); |
2959 if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ | 3150 if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ |
2960 sqlite3OsUnfetch(pWal->pDbFd, 0, 0); | 3151 sqlite3OsUnfetch(pWal->pDbFd, 0, 0); |
2961 } | 3152 } |
2962 } | 3153 } |
2963 | 3154 |
2964 /* Copy data from the log to the database file. */ | 3155 /* Copy data from the log to the database file. */ |
2965 if( rc==SQLITE_OK ){ | 3156 if( rc==SQLITE_OK ){ |
2966 if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ | 3157 if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ |
2967 rc = SQLITE_CORRUPT_BKPT; | 3158 rc = SQLITE_CORRUPT_BKPT; |
2968 }else{ | 3159 }else{ |
2969 rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf); | 3160 rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); |
2970 } | 3161 } |
2971 | 3162 |
2972 /* If no error occurred, set the output variables. */ | 3163 /* If no error occurred, set the output variables. */ |
2973 if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ | 3164 if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ |
2974 if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; | 3165 if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; |
2975 if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); | 3166 if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); |
2976 } | 3167 } |
2977 } | 3168 } |
2978 | 3169 |
2979 if( isChanged ){ | 3170 if( isChanged ){ |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3071 | 3262 |
3072 /* | 3263 /* |
3073 ** Return true if the argument is non-NULL and the WAL module is using | 3264 ** Return true if the argument is non-NULL and the WAL module is using |
3074 ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the | 3265 ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the |
3075 ** WAL module is using shared-memory, return false. | 3266 ** WAL module is using shared-memory, return false. |
3076 */ | 3267 */ |
3077 int sqlite3WalHeapMemory(Wal *pWal){ | 3268 int sqlite3WalHeapMemory(Wal *pWal){ |
3078 return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); | 3269 return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); |
3079 } | 3270 } |
3080 | 3271 |
| 3272 #ifdef SQLITE_ENABLE_SNAPSHOT |
| 3273 /* Create a snapshot object. The content of a snapshot is opaque to |
| 3274 ** every other subsystem, so the WAL module can put whatever it needs |
| 3275 ** in the object. |
| 3276 */ |
| 3277 int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){ |
| 3278 int rc = SQLITE_OK; |
| 3279 WalIndexHdr *pRet; |
| 3280 |
| 3281 assert( pWal->readLock>=0 && pWal->writeLock==0 ); |
| 3282 |
| 3283 pRet = (WalIndexHdr*)sqlite3_malloc(sizeof(WalIndexHdr)); |
| 3284 if( pRet==0 ){ |
| 3285 rc = SQLITE_NOMEM; |
| 3286 }else{ |
| 3287 memcpy(pRet, &pWal->hdr, sizeof(WalIndexHdr)); |
| 3288 *ppSnapshot = (sqlite3_snapshot*)pRet; |
| 3289 } |
| 3290 |
| 3291 return rc; |
| 3292 } |
| 3293 |
| 3294 /* Try to open on pSnapshot when the next read-transaction starts |
| 3295 */ |
| 3296 void sqlite3WalSnapshotOpen(Wal *pWal, sqlite3_snapshot *pSnapshot){ |
| 3297 pWal->pSnapshot = (WalIndexHdr*)pSnapshot; |
| 3298 } |
| 3299 #endif /* SQLITE_ENABLE_SNAPSHOT */ |
| 3300 |
3081 #ifdef SQLITE_ENABLE_ZIPVFS | 3301 #ifdef SQLITE_ENABLE_ZIPVFS |
3082 /* | 3302 /* |
3083 ** If the argument is not NULL, it points to a Wal object that holds a | 3303 ** If the argument is not NULL, it points to a Wal object that holds a |
3084 ** read-lock. This function returns the database page-size if it is known, | 3304 ** read-lock. This function returns the database page-size if it is known, |
3085 ** or zero if it is not (or if pWal is NULL). | 3305 ** or zero if it is not (or if pWal is NULL). |
3086 */ | 3306 */ |
3087 int sqlite3WalFramesize(Wal *pWal){ | 3307 int sqlite3WalFramesize(Wal *pWal){ |
3088 assert( pWal==0 || pWal->readLock>=0 ); | 3308 assert( pWal==0 || pWal->readLock>=0 ); |
3089 return (pWal ? pWal->szPage : 0); | 3309 return (pWal ? pWal->szPage : 0); |
3090 } | 3310 } |
3091 #endif | 3311 #endif |
3092 | 3312 |
| 3313 /* Return the sqlite3_file object for the WAL file |
| 3314 */ |
| 3315 sqlite3_file *sqlite3WalFile(Wal *pWal){ |
| 3316 return pWal->pWalFd; |
| 3317 } |
| 3318 |
3093 #endif /* #ifndef SQLITE_OMIT_WAL */ | 3319 #endif /* #ifndef SQLITE_OMIT_WAL */ |
OLD | NEW |