Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(989)

Side by Side Diff: third_party/sqlite/src/pager.c

Issue 3108030: Move bundled copy of sqlite one level deeper to better separate it... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/sqlite/src/pager.h ('k') | third_party/sqlite/src/parse.y » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** This is the implementation of the page cache subsystem or "pager".
13 **
14 ** The pager is used to access a database disk file. It implements
15 ** atomic commit and rollback through the use of a journal file that
16 ** is separate from the database file. The pager also implements file
17 ** locking to prevent two processes from writing the same database
18 ** file simultaneously, or one process from reading the database while
19 ** another is writing.
20 **
21 ** @(#) $Id: pager.c,v 1.629 2009/08/10 17:48:57 drh Exp $
22 */
23 #ifndef SQLITE_OMIT_DISKIO
24 #include "sqliteInt.h"
25
26 /*
27 ** Macros for troubleshooting. Normally turned off
28 */
29 #if 0
30 int sqlite3PagerTrace=1; /* True to enable tracing */
31 #define sqlite3DebugPrintf printf
32 #define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; }
33 #else
34 #define PAGERTRACE(X)
35 #endif
36
37 /*
38 ** The following two macros are used within the PAGERTRACE() macros above
39 ** to print out file-descriptors.
40 **
41 ** PAGERID() takes a pointer to a Pager struct as its argument. The
42 ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
43 ** struct as its argument.
44 */
45 #define PAGERID(p) ((int)(p->fd))
46 #define FILEHANDLEID(fd) ((int)fd)
47
48 /*
49 ** The page cache as a whole is always in one of the following
50 ** states:
51 **
52 ** PAGER_UNLOCK The page cache is not currently reading or
53 ** writing the database file. There is no
54 ** data held in memory. This is the initial
55 ** state.
56 **
57 ** PAGER_SHARED The page cache is reading the database.
58 ** Writing is not permitted. There can be
59 ** multiple readers accessing the same database
60 ** file at the same time.
61 **
62 ** PAGER_RESERVED This process has reserved the database for writing
63 ** but has not yet made any changes. Only one process
64 ** at a time can reserve the database. The original
65 ** database file has not been modified so other
66 ** processes may still be reading the on-disk
67 ** database file.
68 **
69 ** PAGER_EXCLUSIVE The page cache is writing the database.
70 ** Access is exclusive. No other processes or
71 ** threads can be reading or writing while one
72 ** process is writing.
73 **
74 ** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE
75 ** after all dirty pages have been written to the
76 ** database file and the file has been synced to
77 ** disk. All that remains to do is to remove or
78 ** truncate the journal file and the transaction
79 ** will be committed.
80 **
81 ** The page cache comes up in PAGER_UNLOCK. The first time a
82 ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
83 ** After all pages have been released using sqlite_page_unref(),
84 ** the state transitions back to PAGER_UNLOCK. The first time
85 ** that sqlite3PagerWrite() is called, the state transitions to
86 ** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be
87 ** called on an outstanding page which means that the pager must
88 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
89 ** PAGER_RESERVED means that there is an open rollback journal.
90 ** The transition to PAGER_EXCLUSIVE occurs before any changes
91 ** are made to the database file, though writes to the rollback
92 ** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()
93 ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
94 ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
95 */
96 #define PAGER_UNLOCK 0
97 #define PAGER_SHARED 1 /* same as SHARED_LOCK */
98 #define PAGER_RESERVED 2 /* same as RESERVED_LOCK */
99 #define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */
100 #define PAGER_SYNCED 5
101
102 /*
103 ** A macro used for invoking the codec if there is one
104 */
105 #ifdef SQLITE_HAS_CODEC
106 # define CODEC1(P,D,N,X,E) \
107 if( P->xCodec && P->xCodec(P->pCodec,D,N,X)==0 ){ E; }
108 # define CODEC2(P,D,N,X,E,O) \
109 if( P->xCodec==0 ){ O=(char*)D; }else \
110 if( (O=(char*)(P->xCodec(P->pCodec,D,N,X)))==0 ){ E; }
111 #else
112 # define CODEC1(P,D,N,X,E) /* NO-OP */
113 # define CODEC2(P,D,N,X,E,O) O=(char*)D
114 #endif
115
116 /*
117 ** The maximum allowed sector size. 64KiB. If the xSectorsize() method
118 ** returns a value larger than this, then MAX_SECTOR_SIZE is used instead.
119 ** This could conceivably cause corruption following a power failure on
120 ** such a system. This is currently an undocumented limit.
121 */
122 #define MAX_SECTOR_SIZE 0x10000
123
124 /*
125 ** An instance of the following structure is allocated for each active
126 ** savepoint and statement transaction in the system. All such structures
127 ** are stored in the Pager.aSavepoint[] array, which is allocated and
128 ** resized using sqlite3Realloc().
129 **
130 ** When a savepoint is created, the PagerSavepoint.iHdrOffset field is
131 ** set to 0. If a journal-header is written into the main journal while
132 ** the savepoint is active, then iHdrOffset is set to the byte offset
133 ** immediately following the last journal record written into the main
134 ** journal before the journal-header. This is required during savepoint
135 ** rollback (see pagerPlaybackSavepoint()).
136 */
137 typedef struct PagerSavepoint PagerSavepoint;
138 struct PagerSavepoint {
139 i64 iOffset; /* Starting offset in main journal */
140 i64 iHdrOffset; /* See above */
141 Bitvec *pInSavepoint; /* Set of pages in this savepoint */
142 Pgno nOrig; /* Original number of pages in file */
143 Pgno iSubRec; /* Index of first record in sub-journal */
144 };
145
146 /*
147 ** A open page cache is an instance of the following structure.
148 **
149 ** errCode
150 **
151 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
152 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
153 ** and is returned as the result of every major pager API call. The
154 ** SQLITE_FULL return code is slightly different. It persists only until the
155 ** next successful rollback is performed on the pager cache. Also,
156 ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
157 ** APIs, they may still be used successfully.
158 **
159 ** dbSizeValid, dbSize, dbOrigSize, dbFileSize
160 **
161 ** Managing the size of the database file in pages is a little complicated.
162 ** The variable Pager.dbSize contains the number of pages that the database
163 ** image currently contains. As the database image grows or shrinks this
164 ** variable is updated. The variable Pager.dbFileSize contains the number
165 ** of pages in the database file. This may be different from Pager.dbSize
166 ** if some pages have been appended to the database image but not yet written
167 ** out from the cache to the actual file on disk. Or if the image has been
168 ** truncated by an incremental-vacuum operation. The Pager.dbOrigSize variable
169 ** contains the number of pages in the database image when the current
170 ** transaction was opened. The contents of all three of these variables is
171 ** only guaranteed to be correct if the boolean Pager.dbSizeValid is true.
172 **
173 ** TODO: Under what conditions is dbSizeValid set? Cleared?
174 **
175 ** changeCountDone
176 **
177 ** This boolean variable is used to make sure that the change-counter
178 ** (the 4-byte header field at byte offset 24 of the database file) is
179 ** not updated more often than necessary.
180 **
181 ** It is set to true when the change-counter field is updated, which
182 ** can only happen if an exclusive lock is held on the database file.
183 ** It is cleared (set to false) whenever an exclusive lock is
184 ** relinquished on the database file. Each time a transaction is committed,
185 ** The changeCountDone flag is inspected. If it is true, the work of
186 ** updating the change-counter is omitted for the current transaction.
187 **
188 ** This mechanism means that when running in exclusive mode, a connection
189 ** need only update the change-counter once, for the first transaction
190 ** committed.
191 **
192 ** dbModified
193 **
194 ** The dbModified flag is set whenever a database page is dirtied.
195 ** It is cleared at the end of each transaction.
196 **
197 ** It is used when committing or otherwise ending a transaction. If
198 ** the dbModified flag is clear then less work has to be done.
199 **
200 ** journalStarted
201 **
202 ** This flag is set whenever the the main journal is synced.
203 **
204 ** The point of this flag is that it must be set after the
205 ** first journal header in a journal file has been synced to disk.
206 ** After this has happened, new pages appended to the database
207 ** do not need the PGHDR_NEED_SYNC flag set, as they do not need
208 ** to wait for a journal sync before they can be written out to
209 ** the database file (see function pager_write()).
210 **
211 ** setMaster
212 **
213 ** This variable is used to ensure that the master journal file name
214 ** (if any) is only written into the journal file once.
215 **
216 ** When committing a transaction, the master journal file name (if any)
217 ** may be written into the journal file while the pager is still in
218 ** PAGER_RESERVED state (see CommitPhaseOne() for the action). It
219 ** then attempts to upgrade to an exclusive lock. If this attempt
220 ** fails, then SQLITE_BUSY may be returned to the user and the user
221 ** may attempt to commit the transaction again later (calling
222 ** CommitPhaseOne() again). This flag is used to ensure that the
223 ** master journal name is only written to the journal file the first
224 ** time CommitPhaseOne() is called.
225 **
226 ** doNotSync
227 **
228 ** This variable is set and cleared by sqlite3PagerWrite().
229 **
230 ** needSync
231 **
232 ** TODO: It might be easier to set this variable in writeJournalHdr()
233 ** and writeMasterJournal() only. Change its meaning to "unsynced data
234 ** has been written to the journal".
235 **
236 ** subjInMemory
237 **
238 ** This is a boolean variable. If true, then any required sub-journal
239 ** is opened as an in-memory journal file. If false, then in-memory
240 ** sub-journals are only used for in-memory pager files.
241 */
242 struct Pager {
243 sqlite3_vfs *pVfs; /* OS functions to use for IO */
244 u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
245 u8 journalMode; /* On of the PAGER_JOURNALMODE_* values */
246 u8 useJournal; /* Use a rollback journal on this file */
247 u8 noReadlock; /* Do not bother to obtain readlocks */
248 u8 noSync; /* Do not sync the journal if true */
249 u8 fullSync; /* Do extra syncs of the journal for robustness */
250 u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */
251 u8 tempFile; /* zFilename is a temporary file */
252 u8 readOnly; /* True for a read-only database */
253 u8 memDb; /* True to inhibit all file I/O */
254
255 /* The following block contains those class members that are dynamically
256 ** modified during normal operations. The other variables in this structure
257 ** are either constant throughout the lifetime of the pager, or else
258 ** used to store configuration parameters that affect the way the pager
259 ** operates.
260 **
261 ** The 'state' variable is described in more detail along with the
262 ** descriptions of the values it may take - PAGER_UNLOCK etc. Many of the
263 ** other variables in this block are described in the comment directly
264 ** above this class definition.
265 */
266 u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
267 u8 dbModified; /* True if there are any changes to the Db */
268 u8 needSync; /* True if an fsync() is needed on the journal */
269 u8 journalStarted; /* True if header of journal is synced */
270 u8 changeCountDone; /* Set after incrementing the change-counter */
271 u8 setMaster; /* True if a m-j name has been written to jrnl */
272 u8 doNotSync; /* Boolean. While true, do not spill the cache */
273 u8 dbSizeValid; /* Set when dbSize is correct */
274 u8 subjInMemory; /* True to use in-memory sub-journals */
275 Pgno dbSize; /* Number of pages in the database */
276 Pgno dbOrigSize; /* dbSize before the current transaction */
277 Pgno dbFileSize; /* Number of pages in the database file */
278 int errCode; /* One of several kinds of errors */
279 int nRec; /* Pages journalled since last j-header written */
280 u32 cksumInit; /* Quasi-random value added to every checksum */
281 u32 nSubRec; /* Number of records written to sub-journal */
282 Bitvec *pInJournal; /* One bit for each page in the database file */
283 sqlite3_file *fd; /* File descriptor for database */
284 sqlite3_file *jfd; /* File descriptor for main journal */
285 sqlite3_file *sjfd; /* File descriptor for sub-journal */
286 i64 journalOff; /* Current write offset in the journal file */
287 i64 journalHdr; /* Byte offset to previous journal header */
288 PagerSavepoint *aSavepoint; /* Array of active savepoints */
289 int nSavepoint; /* Number of elements in aSavepoint[] */
290 char dbFileVers[16]; /* Changes whenever database file changes */
291 u32 sectorSize; /* Assumed sector size during rollback */
292
293 u16 nExtra; /* Add this many bytes to each in-memory page */
294 i16 nReserve; /* Number of unused bytes at end of each page */
295 u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
296 int pageSize; /* Number of bytes in a page */
297 Pgno mxPgno; /* Maximum allowed size of the database */
298 char *zFilename; /* Name of the database file */
299 char *zJournal; /* Name of the journal file */
300 int (*xBusyHandler)(void*); /* Function to call when busy */
301 void *pBusyHandlerArg; /* Context argument for xBusyHandler */
302 #ifdef SQLITE_TEST
303 int nHit, nMiss; /* Cache hits and missing */
304 int nRead, nWrite; /* Database pages read/written */
305 #endif
306 void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
307 #ifdef SQLITE_HAS_CODEC
308 void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
309 void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
310 void (*xCodecFree)(void*); /* Destructor for the codec */
311 void *pCodec; /* First argument to xCodec... methods */
312 #endif
313 char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
314 i64 journalSizeLimit; /* Size limit for persistent journal files */
315 PCache *pPCache; /* Pointer to page cache object */
316 sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */
317 };
318
319 /*
320 ** The following global variables hold counters used for
321 ** testing purposes only. These variables do not exist in
322 ** a non-testing build. These variables are not thread-safe.
323 */
324 #ifdef SQLITE_TEST
325 int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */
326 int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */
327 int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */
328 # define PAGER_INCR(v) v++
329 #else
330 # define PAGER_INCR(v)
331 #endif
332
333
334
335 /*
336 ** Journal files begin with the following magic string. The data
337 ** was obtained from /dev/random. It is used only as a sanity check.
338 **
339 ** Since version 2.8.0, the journal format contains additional sanity
340 ** checking information. If the power fails while the journal is being
341 ** written, semi-random garbage data might appear in the journal
342 ** file after power is restored. If an attempt is then made
343 ** to roll the journal back, the database could be corrupted. The additional
344 ** sanity checking data is an attempt to discover the garbage in the
345 ** journal and ignore it.
346 **
347 ** The sanity checking information for the new journal format consists
348 ** of a 32-bit checksum on each page of data. The checksum covers both
349 ** the page number and the pPager->pageSize bytes of data for the page.
350 ** This cksum is initialized to a 32-bit random value that appears in the
351 ** journal file right after the header. The random initializer is important,
352 ** because garbage data that appears at the end of a journal is likely
353 ** data that was once in other files that have now been deleted. If the
354 ** garbage data came from an obsolete journal file, the checksums might
355 ** be correct. But by initializing the checksum to random value which
356 ** is different for every journal, we minimize that risk.
357 */
358 static const unsigned char aJournalMagic[] = {
359 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
360 };
361
362 /*
363 ** The size of the of each page record in the journal is given by
364 ** the following macro.
365 */
366 #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
367
368 /*
369 ** The journal header size for this pager. This is usually the same
370 ** size as a single disk sector. See also setSectorSize().
371 */
372 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
373
374 /*
375 ** The macro MEMDB is true if we are dealing with an in-memory database.
376 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
377 ** the value of MEMDB will be a constant and the compiler will optimize
378 ** out code that would never execute.
379 */
380 #ifdef SQLITE_OMIT_MEMORYDB
381 # define MEMDB 0
382 #else
383 # define MEMDB pPager->memDb
384 #endif
385
386 /*
387 ** The maximum legal page number is (2^31 - 1).
388 */
389 #define PAGER_MAX_PGNO 2147483647
390
391 /* Begin preload-cache.patch for Chromium */
392 /* See comments above the definition. */
393 int sqlite3PagerAcquire2(
394 Pager *pPager,
395 Pgno pgno,
396 DbPage **ppPage,
397 int noContent,
398 unsigned char *pDataToFill);
399 /* End preload-cache.patch for Chromium */
400
401 #ifndef NDEBUG
402 /*
403 ** Usage:
404 **
405 ** assert( assert_pager_state(pPager) );
406 */
407 static int assert_pager_state(Pager *pPager){
408
409 /* A temp-file is always in PAGER_EXCLUSIVE or PAGER_SYNCED state. */
410 assert( pPager->tempFile==0 || pPager->state>=PAGER_EXCLUSIVE );
411
412 /* The changeCountDone flag is always set for temp-files */
413 assert( pPager->tempFile==0 || pPager->changeCountDone );
414
415 return 1;
416 }
417 #endif
418
419 /*
420 ** Return true if it is necessary to write page *pPg into the sub-journal.
421 ** A page needs to be written into the sub-journal if there exists one
422 ** or more open savepoints for which:
423 **
424 ** * The page-number is less than or equal to PagerSavepoint.nOrig, and
425 ** * The bit corresponding to the page-number is not set in
426 ** PagerSavepoint.pInSavepoint.
427 */
428 static int subjRequiresPage(PgHdr *pPg){
429 Pgno pgno = pPg->pgno;
430 Pager *pPager = pPg->pPager;
431 int i;
432 for(i=0; i<pPager->nSavepoint; i++){
433 PagerSavepoint *p = &pPager->aSavepoint[i];
434 if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
435 return 1;
436 }
437 }
438 return 0;
439 }
440
441 /*
442 ** Return true if the page is already in the journal file.
443 */
444 static int pageInJournal(PgHdr *pPg){
445 return sqlite3BitvecTest(pPg->pPager->pInJournal, pPg->pgno);
446 }
447
448 /*
449 ** Read a 32-bit integer from the given file descriptor. Store the integer
450 ** that is read in *pRes. Return SQLITE_OK if everything worked, or an
451 ** error code is something goes wrong.
452 **
453 ** All values are stored on disk as big-endian.
454 */
455 static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
456 unsigned char ac[4];
457 int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
458 if( rc==SQLITE_OK ){
459 *pRes = sqlite3Get4byte(ac);
460 }
461 return rc;
462 }
463
464 /*
465 ** Write a 32-bit integer into a string buffer in big-endian byte order.
466 */
467 #define put32bits(A,B) sqlite3Put4byte((u8*)A,B)
468
469 /*
470 ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
471 ** on success or an error code is something goes wrong.
472 */
473 static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
474 char ac[4];
475 put32bits(ac, val);
476 return sqlite3OsWrite(fd, ac, 4, offset);
477 }
478
479 /*
480 ** The argument to this macro is a file descriptor (type sqlite3_file*).
481 ** Return 0 if it is not open, or non-zero (but not 1) if it is.
482 **
483 ** This is so that expressions can be written as:
484 **
485 ** if( isOpen(pPager->jfd) ){ ...
486 **
487 ** instead of
488 **
489 ** if( pPager->jfd->pMethods ){ ...
490 */
491 #define isOpen(pFd) ((pFd)->pMethods)
492
493 /*
494 ** If file pFd is open, call sqlite3OsUnlock() on it.
495 */
496 static int osUnlock(sqlite3_file *pFd, int eLock){
497 if( !isOpen(pFd) ){
498 return SQLITE_OK;
499 }
500 return sqlite3OsUnlock(pFd, eLock);
501 }
502
503 /*
504 ** This function determines whether or not the atomic-write optimization
505 ** can be used with this pager. The optimization can be used if:
506 **
507 ** (a) the value returned by OsDeviceCharacteristics() indicates that
508 ** a database page may be written atomically, and
509 ** (b) the value returned by OsSectorSize() is less than or equal
510 ** to the page size.
511 **
512 ** The optimization is also always enabled for temporary files. It is
513 ** an error to call this function if pPager is opened on an in-memory
514 ** database.
515 **
516 ** If the optimization cannot be used, 0 is returned. If it can be used,
517 ** then the value returned is the size of the journal file when it
518 ** contains rollback data for exactly one page.
519 */
520 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
521 static int jrnlBufferSize(Pager *pPager){
522 assert( !MEMDB );
523 if( !pPager->tempFile ){
524 int dc; /* Device characteristics */
525 int nSector; /* Sector size */
526 int szPage; /* Page size */
527
528 assert( isOpen(pPager->fd) );
529 dc = sqlite3OsDeviceCharacteristics(pPager->fd);
530 nSector = pPager->sectorSize;
531 szPage = pPager->pageSize;
532
533 assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
534 assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
535 if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
536 return 0;
537 }
538 }
539
540 return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
541 }
542 #endif
543
544 /*
545 ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
546 ** on the cache using a hash function. This is used for testing
547 ** and debugging only.
548 */
549 #ifdef SQLITE_CHECK_PAGES
550 /*
551 ** Return a 32-bit hash of the page data for pPage.
552 */
553 static u32 pager_datahash(int nByte, unsigned char *pData){
554 u32 hash = 0;
555 int i;
556 for(i=0; i<nByte; i++){
557 hash = (hash*1039) + pData[i];
558 }
559 return hash;
560 }
561 static u32 pager_pagehash(PgHdr *pPage){
562 return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
563 }
564 static void pager_set_pagehash(PgHdr *pPage){
565 pPage->pageHash = pager_pagehash(pPage);
566 }
567
568 /*
569 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
570 ** is defined, and NDEBUG is not defined, an assert() statement checks
571 ** that the page is either dirty or still matches the calculated page-hash.
572 */
573 #define CHECK_PAGE(x) checkPage(x)
574 static void checkPage(PgHdr *pPg){
575 Pager *pPager = pPg->pPager;
576 assert( !pPg->pageHash || pPager->errCode
577 || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
578 }
579
580 #else
581 #define pager_datahash(X,Y) 0
582 #define pager_pagehash(X) 0
583 #define CHECK_PAGE(x)
584 #endif /* SQLITE_CHECK_PAGES */
585
586 /*
587 ** When this is called the journal file for pager pPager must be open.
588 ** This function attempts to read a master journal file name from the
589 ** end of the file and, if successful, copies it into memory supplied
590 ** by the caller. See comments above writeMasterJournal() for the format
591 ** used to store a master journal file name at the end of a journal file.
592 **
593 ** zMaster must point to a buffer of at least nMaster bytes allocated by
594 ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
595 ** enough space to write the master journal name). If the master journal
596 ** name in the journal is longer than nMaster bytes (including a
597 ** nul-terminator), then this is handled as if no master journal name
598 ** were present in the journal.
599 **
600 ** If a master journal file name is present at the end of the journal
601 ** file, then it is copied into the buffer pointed to by zMaster. A
602 ** nul-terminator byte is appended to the buffer following the master
603 ** journal file name.
604 **
605 ** If it is determined that no master journal file name is present
606 ** zMaster[0] is set to 0 and SQLITE_OK returned.
607 **
608 ** If an error occurs while reading from the journal file, an SQLite
609 ** error code is returned.
610 */
611 static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){
612 int rc; /* Return code */
613 u32 len; /* Length in bytes of master journal name */
614 i64 szJ; /* Total size in bytes of journal file pJrnl */
615 u32 cksum; /* MJ checksum value read from journal */
616 u32 u; /* Unsigned loop counter */
617 unsigned char aMagic[8]; /* A buffer to hold the magic header */
618 zMaster[0] = '\0';
619
620 if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ))
621 || szJ<16
622 || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len))
623 || len>=nMaster
624 || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum))
625 || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8))
626 || memcmp(aMagic, aJournalMagic, 8)
627 || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len))
628 ){
629 return rc;
630 }
631
632 /* See if the checksum matches the master journal name */
633 for(u=0; u<len; u++){
634 cksum -= zMaster[u];
635 }
636 if( cksum ){
637 /* If the checksum doesn't add up, then one or more of the disk sectors
638 ** containing the master journal filename is corrupted. This means
639 ** definitely roll back, so just return SQLITE_OK and report a (nul)
640 ** master-journal filename.
641 */
642 len = 0;
643 }
644 zMaster[len] = '\0';
645
646 return SQLITE_OK;
647 }
648
649 /*
650 ** Return the offset of the sector boundary at or immediately
651 ** following the value in pPager->journalOff, assuming a sector
652 ** size of pPager->sectorSize bytes.
653 **
654 ** i.e for a sector size of 512:
655 **
656 ** Pager.journalOff Return value
657 ** ---------------------------------------
658 ** 0 0
659 ** 512 512
660 ** 100 512
661 ** 2000 2048
662 **
663 */
664 static i64 journalHdrOffset(Pager *pPager){
665 i64 offset = 0;
666 i64 c = pPager->journalOff;
667 if( c ){
668 offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
669 }
670 assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
671 assert( offset>=c );
672 assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
673 return offset;
674 }
675
676 /*
677 ** The journal file must be open when this function is called.
678 **
679 ** This function is a no-op if the journal file has not been written to
680 ** within the current transaction (i.e. if Pager.journalOff==0).
681 **
682 ** If doTruncate is non-zero or the Pager.journalSizeLimit variable is
683 ** set to 0, then truncate the journal file to zero bytes in size. Otherwise,
684 ** zero the 28-byte header at the start of the journal file. In either case,
685 ** if the pager is not in no-sync mode, sync the journal file immediately
686 ** after writing or truncating it.
687 **
688 ** If Pager.journalSizeLimit is set to a positive, non-zero value, and
689 ** following the truncation or zeroing described above the size of the
690 ** journal file in bytes is larger than this value, then truncate the
691 ** journal file to Pager.journalSizeLimit bytes. The journal file does
692 ** not need to be synced following this operation.
693 **
694 ** If an IO error occurs, abandon processing and return the IO error code.
695 ** Otherwise, return SQLITE_OK.
696 */
697 static int zeroJournalHdr(Pager *pPager, int doTruncate){
698 int rc = SQLITE_OK; /* Return code */
699 assert( isOpen(pPager->jfd) );
700 if( pPager->journalOff ){
701 const i64 iLimit = pPager->journalSizeLimit; /* Local cache of jsl */
702
703 IOTRACE(("JZEROHDR %p\n", pPager))
704 if( doTruncate || iLimit==0 ){
705 rc = sqlite3OsTruncate(pPager->jfd, 0);
706 }else{
707 static const char zeroHdr[28] = {0};
708 rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
709 }
710 if( rc==SQLITE_OK && !pPager->noSync ){
711 rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
712 }
713
714 /* At this point the transaction is committed but the write lock
715 ** is still held on the file. If there is a size limit configured for
716 ** the persistent journal and the journal file currently consumes more
717 ** space than that limit allows for, truncate it now. There is no need
718 ** to sync the file following this operation.
719 */
720 if( rc==SQLITE_OK && iLimit>0 ){
721 i64 sz;
722 rc = sqlite3OsFileSize(pPager->jfd, &sz);
723 if( rc==SQLITE_OK && sz>iLimit ){
724 rc = sqlite3OsTruncate(pPager->jfd, iLimit);
725 }
726 }
727 }
728 return rc;
729 }
730
731 /*
732 ** The journal file must be open when this routine is called. A journal
733 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
734 ** current location.
735 **
736 ** The format for the journal header is as follows:
737 ** - 8 bytes: Magic identifying journal format.
738 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
739 ** - 4 bytes: Random number used for page hash.
740 ** - 4 bytes: Initial database page count.
741 ** - 4 bytes: Sector size used by the process that wrote this journal.
742 ** - 4 bytes: Database page size.
743 **
744 ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
745 */
746 static int writeJournalHdr(Pager *pPager){
747 int rc = SQLITE_OK; /* Return code */
748 char *zHeader = pPager->pTmpSpace; /* Temporary space used to build header */
749 u32 nHeader = pPager->pageSize; /* Size of buffer pointed to by zHeader */
750 u32 nWrite; /* Bytes of header sector written */
751 int ii; /* Loop counter */
752
753 assert( isOpen(pPager->jfd) ); /* Journal file must be open. */
754
755 if( nHeader>JOURNAL_HDR_SZ(pPager) ){
756 nHeader = JOURNAL_HDR_SZ(pPager);
757 }
758
759 /* If there are active savepoints and any of them were created
760 ** since the most recent journal header was written, update the
761 ** PagerSavepoint.iHdrOffset fields now.
762 */
763 for(ii=0; ii<pPager->nSavepoint; ii++){
764 if( pPager->aSavepoint[ii].iHdrOffset==0 ){
765 pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff;
766 }
767 }
768
769 pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager);
770
771 /*
772 ** Write the nRec Field - the number of page records that follow this
773 ** journal header. Normally, zero is written to this value at this time.
774 ** After the records are added to the journal (and the journal synced,
775 ** if in full-sync mode), the zero is overwritten with the true number
776 ** of records (see syncJournal()).
777 **
778 ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
779 ** reading the journal this value tells SQLite to assume that the
780 ** rest of the journal file contains valid page records. This assumption
781 ** is dangerous, as if a failure occurred whilst writing to the journal
782 ** file it may contain some garbage data. There are two scenarios
783 ** where this risk can be ignored:
784 **
785 ** * When the pager is in no-sync mode. Corruption can follow a
786 ** power failure in this case anyway.
787 **
788 ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
789 ** that garbage data is never appended to the journal file.
790 */
791 assert( isOpen(pPager->fd) || pPager->noSync );
792 if( (pPager->noSync) || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY)
793 || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
794 ){
795 memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
796 put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
797 }else{
798 memset(zHeader, 0, sizeof(aJournalMagic)+4);
799 }
800
801 /* The random check-hash initialiser */
802 sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
803 put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
804 /* The initial database size */
805 put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
806 /* The assumed sector size for this process */
807 put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
808
809 /* The page size */
810 put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
811
812 /* Initializing the tail of the buffer is not necessary. Everything
813 ** works find if the following memset() is omitted. But initializing
814 ** the memory prevents valgrind from complaining, so we are willing to
815 ** take the performance hit.
816 */
817 memset(&zHeader[sizeof(aJournalMagic)+20], 0,
818 nHeader-(sizeof(aJournalMagic)+20));
819
820 /* In theory, it is only necessary to write the 28 bytes that the
821 ** journal header consumes to the journal file here. Then increment the
822 ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next
823 ** record is written to the following sector (leaving a gap in the file
824 ** that will be implicitly filled in by the OS).
825 **
826 ** However it has been discovered that on some systems this pattern can
827 ** be significantly slower than contiguously writing data to the file,
828 ** even if that means explicitly writing data to the block of
829 ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what
830 ** is done.
831 **
832 ** The loop is required here in case the sector-size is larger than the
833 ** database page size. Since the zHeader buffer is only Pager.pageSize
834 ** bytes in size, more than one call to sqlite3OsWrite() may be required
835 ** to populate the entire journal header sector.
836 */
837 for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
838 IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
839 rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
840 pPager->journalOff += nHeader;
841 }
842
843 return rc;
844 }
845
846 /*
847 ** The journal file must be open when this is called. A journal header file
848 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
849 ** file. The current location in the journal file is given by
850 ** pPager->journalOff. See comments above function writeJournalHdr() for
851 ** a description of the journal header format.
852 **
853 ** If the header is read successfully, *pNRec is set to the number of
854 ** page records following this header and *pDbSize is set to the size of the
855 ** database before the transaction began, in pages. Also, pPager->cksumInit
856 ** is set to the value read from the journal header. SQLITE_OK is returned
857 ** in this case.
858 **
859 ** If the journal header file appears to be corrupted, SQLITE_DONE is
860 ** returned and *pNRec and *PDbSize are undefined. If JOURNAL_HDR_SZ bytes
861 ** cannot be read from the journal file an error code is returned.
862 */
863 static int readJournalHdr(
864 Pager *pPager, /* Pager object */
865 int isHot,
866 i64 journalSize, /* Size of the open journal file in bytes */
867 u32 *pNRec, /* OUT: Value read from the nRec field */
868 u32 *pDbSize /* OUT: Value of original database size field */
869 ){
870 int rc; /* Return code */
871 unsigned char aMagic[8]; /* A buffer to hold the magic header */
872 i64 iHdrOff; /* Offset of journal header being read */
873
874 assert( isOpen(pPager->jfd) ); /* Journal file must be open. */
875
876 /* Advance Pager.journalOff to the start of the next sector. If the
877 ** journal file is too small for there to be a header stored at this
878 ** point, return SQLITE_DONE.
879 */
880 pPager->journalOff = journalHdrOffset(pPager);
881 if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
882 return SQLITE_DONE;
883 }
884 iHdrOff = pPager->journalOff;
885
886 /* Read in the first 8 bytes of the journal header. If they do not match
887 ** the magic string found at the start of each journal header, return
888 ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise,
889 ** proceed.
890 */
891 if( isHot || iHdrOff!=pPager->journalHdr ){
892 rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff);
893 if( rc ){
894 return rc;
895 }
896 if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
897 return SQLITE_DONE;
898 }
899 }
900
901 /* Read the first three 32-bit fields of the journal header: The nRec
902 ** field, the checksum-initializer and the database size at the start
903 ** of the transaction. Return an error code if anything goes wrong.
904 */
905 if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec))
906 || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit))
907 || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize))
908 ){
909 return rc;
910 }
911
912 if( pPager->journalOff==0 ){
913 u32 iPageSize; /* Page-size field of journal header */
914 u32 iSectorSize; /* Sector-size field of journal header */
915 u16 iPageSize16; /* Copy of iPageSize in 16-bit variable */
916
917 /* Read the page-size and sector-size journal header fields. */
918 if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize))
919 || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize))
920 ){
921 return rc;
922 }
923
924 /* Check that the values read from the page-size and sector-size fields
925 ** are within range. To be 'in range', both values need to be a power
926 ** of two greater than or equal to 512, and not greater than their
927 ** respective compile time maximum limits.
928 */
929 if( iPageSize<512 || iSectorSize<512
930 || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE
931 || ((iPageSize-1)&iPageSize)!=0 || ((iSectorSize-1)&iSectorSize)!=0
932 ){
933 /* If the either the page-size or sector-size in the journal-header is
934 ** invalid, then the process that wrote the journal-header must have
935 ** crashed before the header was synced. In this case stop reading
936 ** the journal file here.
937 */
938 return SQLITE_DONE;
939 }
940
941 /* Update the page-size to match the value read from the journal.
942 ** Use a testcase() macro to make sure that malloc failure within
943 ** PagerSetPagesize() is tested.
944 */
945 iPageSize16 = (u16)iPageSize;
946 rc = sqlite3PagerSetPagesize(pPager, &iPageSize16, -1);
947 testcase( rc!=SQLITE_OK );
948 assert( rc!=SQLITE_OK || iPageSize16==(u16)iPageSize );
949
950 /* Update the assumed sector-size to match the value used by
951 ** the process that created this journal. If this journal was
952 ** created by a process other than this one, then this routine
953 ** is being called from within pager_playback(). The local value
954 ** of Pager.sectorSize is restored at the end of that routine.
955 */
956 pPager->sectorSize = iSectorSize;
957 }
958
959 pPager->journalOff += JOURNAL_HDR_SZ(pPager);
960 return rc;
961 }
962
963
964 /*
965 ** Write the supplied master journal name into the journal file for pager
966 ** pPager at the current location. The master journal name must be the last
967 ** thing written to a journal file. If the pager is in full-sync mode, the
968 ** journal file descriptor is advanced to the next sector boundary before
969 ** anything is written. The format is:
970 **
971 ** + 4 bytes: PAGER_MJ_PGNO.
972 ** + N bytes: Master journal filename in utf-8.
973 ** + 4 bytes: N (length of master journal name in bytes, no nul-terminator).
974 ** + 4 bytes: Master journal name checksum.
975 ** + 8 bytes: aJournalMagic[].
976 **
977 ** The master journal page checksum is the sum of the bytes in the master
978 ** journal name, where each byte is interpreted as a signed 8-bit integer.
979 **
980 ** If zMaster is a NULL pointer (occurs for a single database transaction),
981 ** this call is a no-op.
982 */
983 static int writeMasterJournal(Pager *pPager, const char *zMaster){
984 int rc; /* Return code */
985 int nMaster; /* Length of string zMaster */
986 i64 iHdrOff; /* Offset of header in journal file */
987 i64 jrnlSize; /* Size of journal file on disk */
988 u32 cksum = 0; /* Checksum of string zMaster */
989
990 if( !zMaster || pPager->setMaster
991 || pPager->journalMode==PAGER_JOURNALMODE_MEMORY
992 || pPager->journalMode==PAGER_JOURNALMODE_OFF
993 ){
994 return SQLITE_OK;
995 }
996 pPager->setMaster = 1;
997 assert( isOpen(pPager->jfd) );
998
999 /* Calculate the length in bytes and the checksum of zMaster */
1000 for(nMaster=0; zMaster[nMaster]; nMaster++){
1001 cksum += zMaster[nMaster];
1002 }
1003
1004 /* If in full-sync mode, advance to the next disk sector before writing
1005 ** the master journal name. This is in case the previous page written to
1006 ** the journal has already been synced.
1007 */
1008 if( pPager->fullSync ){
1009 pPager->journalOff = journalHdrOffset(pPager);
1010 }
1011 iHdrOff = pPager->journalOff;
1012
1013 /* Write the master journal data to the end of the journal file. If
1014 ** an error occurs, return the error code to the caller.
1015 */
1016 if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager))))
1017 || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4)))
1018 || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster)))
1019 || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum)))
1020 || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaste r+8)))
1021 ){
1022 return rc;
1023 }
1024 pPager->journalOff += (nMaster+20);
1025 pPager->needSync = !pPager->noSync;
1026
1027 /* If the pager is in peristent-journal mode, then the physical
1028 ** journal-file may extend past the end of the master-journal name
1029 ** and 8 bytes of magic data just written to the file. This is
1030 ** dangerous because the code to rollback a hot-journal file
1031 ** will not be able to find the master-journal name to determine
1032 ** whether or not the journal is hot.
1033 **
1034 ** Easiest thing to do in this scenario is to truncate the journal
1035 ** file to the required size.
1036 */
1037 if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))
1038 && jrnlSize>pPager->journalOff
1039 ){
1040 rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff);
1041 }
1042 return rc;
1043 }
1044
1045 /*
1046 ** Find a page in the hash table given its page number. Return
1047 ** a pointer to the page or NULL if the requested page is not
1048 ** already in memory.
1049 */
1050 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
1051 PgHdr *p; /* Return value */
1052
1053 /* It is not possible for a call to PcacheFetch() with createFlag==0 to
1054 ** fail, since no attempt to allocate dynamic memory will be made.
1055 */
1056 (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
1057 return p;
1058 }
1059
1060 /*
1061 ** Unless the pager is in error-state, discard all in-memory pages. If
1062 ** the pager is in error-state, then this call is a no-op.
1063 **
1064 ** TODO: Why can we not reset the pager while in error state?
1065 */
1066 static void pager_reset(Pager *pPager){
1067 if( SQLITE_OK==pPager->errCode ){
1068 sqlite3BackupRestart(pPager->pBackup);
1069 sqlite3PcacheClear(pPager->pPCache);
1070 pPager->dbSizeValid = 0;
1071 }
1072 }
1073
1074 /*
1075 ** Free all structures in the Pager.aSavepoint[] array and set both
1076 ** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal
1077 ** if it is open and the pager is not in exclusive mode.
1078 */
1079 static void releaseAllSavepoints(Pager *pPager){
1080 int ii; /* Iterator for looping through Pager.aSavepoint */
1081 for(ii=0; ii<pPager->nSavepoint; ii++){
1082 sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
1083 }
1084 if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){
1085 sqlite3OsClose(pPager->sjfd);
1086 }
1087 sqlite3_free(pPager->aSavepoint);
1088 pPager->aSavepoint = 0;
1089 pPager->nSavepoint = 0;
1090 pPager->nSubRec = 0;
1091 }
1092
1093 /*
1094 ** Set the bit number pgno in the PagerSavepoint.pInSavepoint
1095 ** bitvecs of all open savepoints. Return SQLITE_OK if successful
1096 ** or SQLITE_NOMEM if a malloc failure occurs.
1097 */
1098 static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){
1099 int ii; /* Loop counter */
1100 int rc = SQLITE_OK; /* Result code */
1101
1102 for(ii=0; ii<pPager->nSavepoint; ii++){
1103 PagerSavepoint *p = &pPager->aSavepoint[ii];
1104 if( pgno<=p->nOrig ){
1105 rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
1106 testcase( rc==SQLITE_NOMEM );
1107 assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
1108 }
1109 }
1110 return rc;
1111 }
1112
1113 /*
1114 ** Unlock the database file. This function is a no-op if the pager
1115 ** is in exclusive mode.
1116 **
1117 ** If the pager is currently in error state, discard the contents of
1118 ** the cache and reset the Pager structure internal state. If there is
1119 ** an open journal-file, then the next time a shared-lock is obtained
1120 ** on the pager file (by this or any other process), it will be
1121 ** treated as a hot-journal and rolled back.
1122 */
1123 static void pager_unlock(Pager *pPager){
1124 if( !pPager->exclusiveMode ){
1125 int rc; /* Return code */
1126
1127 /* Always close the journal file when dropping the database lock.
1128 ** Otherwise, another connection with journal_mode=delete might
1129 ** delete the file out from under us.
1130 */
1131 sqlite3OsClose(pPager->jfd);
1132 sqlite3BitvecDestroy(pPager->pInJournal);
1133 pPager->pInJournal = 0;
1134 releaseAllSavepoints(pPager);
1135
1136 /* If the file is unlocked, somebody else might change it. The
1137 ** values stored in Pager.dbSize etc. might become invalid if
1138 ** this happens. TODO: Really, this doesn't need to be cleared
1139 ** until the change-counter check fails in PagerSharedLock().
1140 */
1141 pPager->dbSizeValid = 0;
1142
1143 rc = osUnlock(pPager->fd, NO_LOCK);
1144 if( rc ){
1145 pPager->errCode = rc;
1146 }
1147 IOTRACE(("UNLOCK %p\n", pPager))
1148
1149 /* If Pager.errCode is set, the contents of the pager cache cannot be
1150 ** trusted. Now that the pager file is unlocked, the contents of the
1151 ** cache can be discarded and the error code safely cleared.
1152 */
1153 if( pPager->errCode ){
1154 if( rc==SQLITE_OK ){
1155 pPager->errCode = SQLITE_OK;
1156 }
1157 pager_reset(pPager);
1158 }
1159
1160 pPager->changeCountDone = 0;
1161 pPager->state = PAGER_UNLOCK;
1162 }
1163 }
1164
1165 /*
1166 ** This function should be called when an IOERR, CORRUPT or FULL error
1167 ** may have occurred. The first argument is a pointer to the pager
1168 ** structure, the second the error-code about to be returned by a pager
1169 ** API function. The value returned is a copy of the second argument
1170 ** to this function.
1171 **
1172 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
1173 ** the error becomes persistent. Until the persisten error is cleared,
1174 ** subsequent API calls on this Pager will immediately return the same
1175 ** error code.
1176 **
1177 ** A persistent error indicates that the contents of the pager-cache
1178 ** cannot be trusted. This state can be cleared by completely discarding
1179 ** the contents of the pager-cache. If a transaction was active when
1180 ** the persistent error occurred, then the rollback journal may need
1181 ** to be replayed to restore the contents of the database file (as if
1182 ** it were a hot-journal).
1183 */
1184 static int pager_error(Pager *pPager, int rc){
1185 int rc2 = rc & 0xff;
1186 assert( rc==SQLITE_OK || !MEMDB );
1187 assert(
1188 pPager->errCode==SQLITE_FULL ||
1189 pPager->errCode==SQLITE_OK ||
1190 (pPager->errCode & 0xff)==SQLITE_IOERR
1191 );
1192 if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){
1193 pPager->errCode = rc;
1194 }
1195 return rc;
1196 }
1197
1198 /*
1199 ** Execute a rollback if a transaction is active and unlock the
1200 ** database file.
1201 **
1202 ** If the pager has already entered the error state, do not attempt
1203 ** the rollback at this time. Instead, pager_unlock() is called. The
1204 ** call to pager_unlock() will discard all in-memory pages, unlock
1205 ** the database file and clear the error state. If this means that
1206 ** there is a hot-journal left in the file-system, the next connection
1207 ** to obtain a shared lock on the pager (which may be this one) will
1208 ** roll it back.
1209 **
1210 ** If the pager has not already entered the error state, but an IO or
1211 ** malloc error occurs during a rollback, then this will itself cause
1212 ** the pager to enter the error state. Which will be cleared by the
1213 ** call to pager_unlock(), as described above.
1214 */
1215 static void pagerUnlockAndRollback(Pager *pPager){
1216 if( pPager->errCode==SQLITE_OK && pPager->state>=PAGER_RESERVED ){
1217 sqlite3BeginBenignMalloc();
1218 sqlite3PagerRollback(pPager);
1219 sqlite3EndBenignMalloc();
1220 }
1221 pager_unlock(pPager);
1222 }
1223
1224 /*
1225 ** This routine ends a transaction. A transaction is usually ended by
1226 ** either a COMMIT or a ROLLBACK operation. This routine may be called
1227 ** after rollback of a hot-journal, or if an error occurs while opening
1228 ** the journal file or writing the very first journal-header of a
1229 ** database transaction.
1230 **
1231 ** If the pager is in PAGER_SHARED or PAGER_UNLOCK state when this
1232 ** routine is called, it is a no-op (returns SQLITE_OK).
1233 **
1234 ** Otherwise, any active savepoints are released.
1235 **
1236 ** If the journal file is open, then it is "finalized". Once a journal
1237 ** file has been finalized it is not possible to use it to roll back a
1238 ** transaction. Nor will it be considered to be a hot-journal by this
1239 ** or any other database connection. Exactly how a journal is finalized
1240 ** depends on whether or not the pager is running in exclusive mode and
1241 ** the current journal-mode (Pager.journalMode value), as follows:
1242 **
1243 ** journalMode==MEMORY
1244 ** Journal file descriptor is simply closed. This destroys an
1245 ** in-memory journal.
1246 **
1247 ** journalMode==TRUNCATE
1248 ** Journal file is truncated to zero bytes in size.
1249 **
1250 ** journalMode==PERSIST
1251 ** The first 28 bytes of the journal file are zeroed. This invalidates
1252 ** the first journal header in the file, and hence the entire journal
1253 ** file. An invalid journal file cannot be rolled back.
1254 **
1255 ** journalMode==DELETE
1256 ** The journal file is closed and deleted using sqlite3OsDelete().
1257 **
1258 ** If the pager is running in exclusive mode, this method of finalizing
1259 ** the journal file is never used. Instead, if the journalMode is
1260 ** DELETE and the pager is in exclusive mode, the method described under
1261 ** journalMode==PERSIST is used instead.
1262 **
1263 ** After the journal is finalized, if running in non-exclusive mode, the
1264 ** pager moves to PAGER_SHARED state (and downgrades the lock on the
1265 ** database file accordingly).
1266 **
1267 ** If the pager is running in exclusive mode and is in PAGER_SYNCED state,
1268 ** it moves to PAGER_EXCLUSIVE. No locks are downgraded when running in
1269 ** exclusive mode.
1270 **
1271 ** SQLITE_OK is returned if no error occurs. If an error occurs during
1272 ** any of the IO operations to finalize the journal file or unlock the
1273 ** database then the IO error code is returned to the user. If the
1274 ** operation to finalize the journal file fails, then the code still
1275 ** tries to unlock the database file if not in exclusive mode. If the
1276 ** unlock operation fails as well, then the first error code related
1277 ** to the first error encountered (the journal finalization one) is
1278 ** returned.
1279 */
1280 static int pager_end_transaction(Pager *pPager, int hasMaster){
1281 int rc = SQLITE_OK; /* Error code from journal finalization operation */
1282 int rc2 = SQLITE_OK; /* Error code from db file unlock operation */
1283
1284 if( pPager->state<PAGER_RESERVED ){
1285 return SQLITE_OK;
1286 }
1287 releaseAllSavepoints(pPager);
1288
1289 assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
1290 if( isOpen(pPager->jfd) ){
1291
1292 /* Finalize the journal file. */
1293 if( sqlite3IsMemJournal(pPager->jfd) ){
1294 assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
1295 sqlite3OsClose(pPager->jfd);
1296 }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
1297 if( pPager->journalOff==0 ){
1298 rc = SQLITE_OK;
1299 }else{
1300 rc = sqlite3OsTruncate(pPager->jfd, 0);
1301 }
1302 pPager->journalOff = 0;
1303 pPager->journalStarted = 0;
1304 }else if( pPager->exclusiveMode
1305 || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
1306 ){
1307 rc = zeroJournalHdr(pPager, hasMaster);
1308 pager_error(pPager, rc);
1309 pPager->journalOff = 0;
1310 pPager->journalStarted = 0;
1311 }else{
1312 /* This branch may be executed with Pager.journalMode==MEMORY if
1313 ** a hot-journal was just rolled back. In this case the journal
1314 ** file should be closed and deleted. If this connection writes to
1315 ** the database file, it will do so using an in-memory journal. */
1316 assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE
1317 || pPager->journalMode==PAGER_JOURNALMODE_MEMORY
1318 );
1319 sqlite3OsClose(pPager->jfd);
1320 if( !pPager->tempFile ){
1321 rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
1322 }
1323 }
1324
1325 #ifdef SQLITE_CHECK_PAGES
1326 sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash);
1327 #endif
1328
1329 sqlite3PcacheCleanAll(pPager->pPCache);
1330 sqlite3BitvecDestroy(pPager->pInJournal);
1331 pPager->pInJournal = 0;
1332 pPager->nRec = 0;
1333 }
1334
1335 if( !pPager->exclusiveMode ){
1336 rc2 = osUnlock(pPager->fd, SHARED_LOCK);
1337 pPager->state = PAGER_SHARED;
1338 pPager->changeCountDone = 0;
1339 }else if( pPager->state==PAGER_SYNCED ){
1340 pPager->state = PAGER_EXCLUSIVE;
1341 }
1342 pPager->setMaster = 0;
1343 pPager->needSync = 0;
1344 pPager->dbModified = 0;
1345
1346 /* TODO: Is this optimal? Why is the db size invalidated here
1347 ** when the database file is not unlocked? */
1348 pPager->dbOrigSize = 0;
1349 sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
1350 if( !MEMDB ){
1351 pPager->dbSizeValid = 0;
1352 }
1353
1354 return (rc==SQLITE_OK?rc2:rc);
1355 }
1356
1357 /*
1358 ** Parameter aData must point to a buffer of pPager->pageSize bytes
1359 ** of data. Compute and return a checksum based ont the contents of the
1360 ** page of data and the current value of pPager->cksumInit.
1361 **
1362 ** This is not a real checksum. It is really just the sum of the
1363 ** random initial value (pPager->cksumInit) and every 200th byte
1364 ** of the page data, starting with byte offset (pPager->pageSize%200).
1365 ** Each byte is interpreted as an 8-bit unsigned integer.
1366 **
1367 ** Changing the formula used to compute this checksum results in an
1368 ** incompatible journal file format.
1369 **
1370 ** If journal corruption occurs due to a power failure, the most likely
1371 ** scenario is that one end or the other of the record will be changed.
1372 ** It is much less likely that the two ends of the journal record will be
1373 ** correct and the middle be corrupt. Thus, this "checksum" scheme,
1374 ** though fast and simple, catches the mostly likely kind of corruption.
1375 */
1376 static u32 pager_cksum(Pager *pPager, const u8 *aData){
1377 u32 cksum = pPager->cksumInit; /* Checksum value to return */
1378 int i = pPager->pageSize-200; /* Loop counter */
1379 while( i>0 ){
1380 cksum += aData[i];
1381 i -= 200;
1382 }
1383 return cksum;
1384 }
1385
1386 /*
1387 ** Read a single page from either the journal file (if isMainJrnl==1) or
1388 ** from the sub-journal (if isMainJrnl==0) and playback that page.
1389 ** The page begins at offset *pOffset into the file. The *pOffset
1390 ** value is increased to the start of the next page in the journal.
1391 **
1392 ** The isMainJrnl flag is true if this is the main rollback journal and
1393 ** false for the statement journal. The main rollback journal uses
1394 ** checksums - the statement journal does not.
1395 **
1396 ** If the page number of the page record read from the (sub-)journal file
1397 ** is greater than the current value of Pager.dbSize, then playback is
1398 ** skipped and SQLITE_OK is returned.
1399 **
1400 ** If pDone is not NULL, then it is a record of pages that have already
1401 ** been played back. If the page at *pOffset has already been played back
1402 ** (if the corresponding pDone bit is set) then skip the playback.
1403 ** Make sure the pDone bit corresponding to the *pOffset page is set
1404 ** prior to returning.
1405 **
1406 ** If the page record is successfully read from the (sub-)journal file
1407 ** and played back, then SQLITE_OK is returned. If an IO error occurs
1408 ** while reading the record from the (sub-)journal file or while writing
1409 ** to the database file, then the IO error code is returned. If data
1410 ** is successfully read from the (sub-)journal file but appears to be
1411 ** corrupted, SQLITE_DONE is returned. Data is considered corrupted in
1412 ** two circumstances:
1413 **
1414 ** * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or
1415 ** * If the record is being rolled back from the main journal file
1416 ** and the checksum field does not match the record content.
1417 **
1418 ** Neither of these two scenarios are possible during a savepoint rollback.
1419 **
1420 ** If this is a savepoint rollback, then memory may have to be dynamically
1421 ** allocated by this function. If this is the case and an allocation fails,
1422 ** SQLITE_NOMEM is returned.
1423 */
1424 static int pager_playback_one_page(
1425 Pager *pPager, /* The pager being played back */
1426 int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */
1427 int isUnsync, /* True if reading from unsynced main journal */
1428 i64 *pOffset, /* Offset of record to playback */
1429 int isSavepnt, /* True for a savepoint rollback */
1430 Bitvec *pDone /* Bitvec of pages already played back */
1431 ){
1432 int rc;
1433 PgHdr *pPg; /* An existing page in the cache */
1434 Pgno pgno; /* The page number of a page in journal */
1435 u32 cksum; /* Checksum used for sanity checking */
1436 u8 *aData; /* Temporary storage for the page */
1437 sqlite3_file *jfd; /* The file descriptor for the journal file */
1438
1439 assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */
1440 assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */
1441 assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */
1442 assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */
1443
1444 aData = (u8*)pPager->pTmpSpace;
1445 assert( aData ); /* Temp storage must have already been allocated */
1446
1447 /* Read the page number and page data from the journal or sub-journal
1448 ** file. Return an error code to the caller if an IO error occurs.
1449 */
1450 jfd = isMainJrnl ? pPager->jfd : pPager->sjfd;
1451 rc = read32bits(jfd, *pOffset, &pgno);
1452 if( rc!=SQLITE_OK ) return rc;
1453 rc = sqlite3OsRead(jfd, aData, pPager->pageSize, (*pOffset)+4);
1454 if( rc!=SQLITE_OK ) return rc;
1455 *pOffset += pPager->pageSize + 4 + isMainJrnl*4;
1456
1457 /* Sanity checking on the page. This is more important that I originally
1458 ** thought. If a power failure occurs while the journal is being written,
1459 ** it could cause invalid data to be written into the journal. We need to
1460 ** detect this invalid data (with high probability) and ignore it.
1461 */
1462 if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
1463 assert( !isSavepnt );
1464 return SQLITE_DONE;
1465 }
1466 if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
1467 return SQLITE_OK;
1468 }
1469 if( isMainJrnl ){
1470 rc = read32bits(jfd, (*pOffset)-4, &cksum);
1471 if( rc ) return rc;
1472 if( !isSavepnt && pager_cksum(pPager, aData)!=cksum ){
1473 return SQLITE_DONE;
1474 }
1475 }
1476
1477 if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){
1478 return rc;
1479 }
1480
1481 assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
1482
1483 /* If the pager is in RESERVED state, then there must be a copy of this
1484 ** page in the pager cache. In this case just update the pager cache,
1485 ** not the database file. The page is left marked dirty in this case.
1486 **
1487 ** An exception to the above rule: If the database is in no-sync mode
1488 ** and a page is moved during an incremental vacuum then the page may
1489 ** not be in the pager cache. Later: if a malloc() or IO error occurs
1490 ** during a Movepage() call, then the page may not be in the cache
1491 ** either. So the condition described in the above paragraph is not
1492 ** assert()able.
1493 **
1494 ** If in EXCLUSIVE state, then we update the pager cache if it exists
1495 ** and the main file. The page is then marked not dirty.
1496 **
1497 ** Ticket #1171: The statement journal might contain page content that is
1498 ** different from the page content at the start of the transaction.
1499 ** This occurs when a page is changed prior to the start of a statement
1500 ** then changed again within the statement. When rolling back such a
1501 ** statement we must not write to the original database unless we know
1502 ** for certain that original page contents are synced into the main rollback
1503 ** journal. Otherwise, a power loss might leave modified data in the
1504 ** database file without an entry in the rollback journal that can
1505 ** restore the database to its original form. Two conditions must be
1506 ** met before writing to the database files. (1) the database must be
1507 ** locked. (2) we know that the original page content is fully synced
1508 ** in the main journal either because the page is not in cache or else
1509 ** the page is marked as needSync==0.
1510 **
1511 ** 2008-04-14: When attempting to vacuum a corrupt database file, it
1512 ** is possible to fail a statement on a database that does not yet exist.
1513 ** Do not attempt to write if database file has never been opened.
1514 */
1515 pPg = pager_lookup(pPager, pgno);
1516 assert( pPg || !MEMDB );
1517 PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n",
1518 PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData),
1519 (isMainJrnl?"main-journal":"sub-journal")
1520 ));
1521 if( (pPager->state>=PAGER_EXCLUSIVE)
1522 && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC))
1523 && isOpen(pPager->fd)
1524 && !isUnsync
1525 ){
1526 i64 ofst = (pgno-1)*(i64)pPager->pageSize;
1527 rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst);
1528 if( pgno>pPager->dbFileSize ){
1529 pPager->dbFileSize = pgno;
1530 }
1531 if( pPager->pBackup ){
1532 CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM);
1533 sqlite3BackupUpdate(pPager->pBackup, pgno, aData);
1534 CODEC1(pPager, aData, pgno, 0, rc=SQLITE_NOMEM);
1535 }
1536 }else if( !isMainJrnl && pPg==0 ){
1537 /* If this is a rollback of a savepoint and data was not written to
1538 ** the database and the page is not in-memory, there is a potential
1539 ** problem. When the page is next fetched by the b-tree layer, it
1540 ** will be read from the database file, which may or may not be
1541 ** current.
1542 **
1543 ** There are a couple of different ways this can happen. All are quite
1544 ** obscure. When running in synchronous mode, this can only happen
1545 ** if the page is on the free-list at the start of the transaction, then
1546 ** populated, then moved using sqlite3PagerMovepage().
1547 **
1548 ** The solution is to add an in-memory page to the cache containing
1549 ** the data just read from the sub-journal. Mark the page as dirty
1550 ** and if the pager requires a journal-sync, then mark the page as
1551 ** requiring a journal-sync before it is written.
1552 */
1553 assert( isSavepnt );
1554 if( (rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1))!=SQLITE_OK ){
1555 return rc;
1556 }
1557 pPg->flags &= ~PGHDR_NEED_READ;
1558 sqlite3PcacheMakeDirty(pPg);
1559 }
1560 if( pPg ){
1561 /* No page should ever be explicitly rolled back that is in use, except
1562 ** for page 1 which is held in use in order to keep the lock on the
1563 ** database active. However such a page may be rolled back as a result
1564 ** of an internal error resulting in an automatic call to
1565 ** sqlite3PagerRollback().
1566 */
1567 void *pData;
1568 pData = pPg->pData;
1569 memcpy(pData, aData, pPager->pageSize);
1570 pPager->xReiniter(pPg);
1571 if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){
1572 /* If the contents of this page were just restored from the main
1573 ** journal file, then its content must be as they were when the
1574 ** transaction was first opened. In this case we can mark the page
1575 ** as clean, since there will be no need to write it out to the.
1576 **
1577 ** There is one exception to this rule. If the page is being rolled
1578 ** back as part of a savepoint (or statement) rollback from an
1579 ** unsynced portion of the main journal file, then it is not safe
1580 ** to mark the page as clean. This is because marking the page as
1581 ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is
1582 ** already in the journal file (recorded in Pager.pInJournal) and
1583 ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to
1584 ** again within this transaction, it will be marked as dirty but
1585 ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially
1586 ** be written out into the database file before its journal file
1587 ** segment is synced. If a crash occurs during or following this,
1588 ** database corruption may ensue.
1589 */
1590 sqlite3PcacheMakeClean(pPg);
1591 }
1592 #ifdef SQLITE_CHECK_PAGES
1593 pPg->pageHash = pager_pagehash(pPg);
1594 #endif
1595 /* If this was page 1, then restore the value of Pager.dbFileVers.
1596 ** Do this before any decoding. */
1597 if( pgno==1 ){
1598 memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
1599 }
1600
1601 /* Decode the page just read from disk */
1602 CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM);
1603 sqlite3PcacheRelease(pPg);
1604 }
1605 return rc;
1606 }
1607
1608 /*
1609 ** Parameter zMaster is the name of a master journal file. A single journal
1610 ** file that referred to the master journal file has just been rolled back.
1611 ** This routine checks if it is possible to delete the master journal file,
1612 ** and does so if it is.
1613 **
1614 ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not
1615 ** available for use within this function.
1616 **
1617 ** When a master journal file is created, it is populated with the names
1618 ** of all of its child journals, one after another, formatted as utf-8
1619 ** encoded text. The end of each child journal file is marked with a
1620 ** nul-terminator byte (0x00). i.e. the entire contents of a master journal
1621 ** file for a transaction involving two databases might be:
1622 **
1623 ** "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00"
1624 **
1625 ** A master journal file may only be deleted once all of its child
1626 ** journals have been rolled back.
1627 **
1628 ** This function reads the contents of the master-journal file into
1629 ** memory and loops through each of the child journal names. For
1630 ** each child journal, it checks if:
1631 **
1632 ** * if the child journal exists, and if so
1633 ** * if the child journal contains a reference to master journal
1634 ** file zMaster
1635 **
1636 ** If a child journal can be found that matches both of the criteria
1637 ** above, this function returns without doing anything. Otherwise, if
1638 ** no such child journal can be found, file zMaster is deleted from
1639 ** the file-system using sqlite3OsDelete().
1640 **
1641 ** If an IO error within this function, an error code is returned. This
1642 ** function allocates memory by calling sqlite3Malloc(). If an allocation
1643 ** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors
1644 ** occur, SQLITE_OK is returned.
1645 **
1646 ** TODO: This function allocates a single block of memory to load
1647 ** the entire contents of the master journal file. This could be
1648 ** a couple of kilobytes or so - potentially larger than the page
1649 ** size.
1650 */
1651 static int pager_delmaster(Pager *pPager, const char *zMaster){
1652 sqlite3_vfs *pVfs = pPager->pVfs;
1653 int rc; /* Return code */
1654 sqlite3_file *pMaster; /* Malloc'd master-journal file descriptor */
1655 sqlite3_file *pJournal; /* Malloc'd child-journal file descriptor */
1656 char *zMasterJournal = 0; /* Contents of master journal file */
1657 i64 nMasterJournal; /* Size of master journal file */
1658
1659 /* Allocate space for both the pJournal and pMaster file descriptors.
1660 ** If successful, open the master journal file for reading.
1661 */
1662 pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2);
1663 pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
1664 if( !pMaster ){
1665 rc = SQLITE_NOMEM;
1666 }else{
1667 const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
1668 rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
1669 }
1670 if( rc!=SQLITE_OK ) goto delmaster_out;
1671
1672 rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
1673 if( rc!=SQLITE_OK ) goto delmaster_out;
1674
1675 if( nMasterJournal>0 ){
1676 char *zJournal;
1677 char *zMasterPtr = 0;
1678 int nMasterPtr = pVfs->mxPathname+1;
1679
1680 /* Load the entire master journal file into space obtained from
1681 ** sqlite3_malloc() and pointed to by zMasterJournal.
1682 */
1683 zMasterJournal = sqlite3Malloc((int)nMasterJournal + nMasterPtr + 1);
1684 if( !zMasterJournal ){
1685 rc = SQLITE_NOMEM;
1686 goto delmaster_out;
1687 }
1688 zMasterPtr = &zMasterJournal[nMasterJournal+1];
1689 rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0);
1690 if( rc!=SQLITE_OK ) goto delmaster_out;
1691 zMasterJournal[nMasterJournal] = 0;
1692
1693 zJournal = zMasterJournal;
1694 while( (zJournal-zMasterJournal)<nMasterJournal ){
1695 int exists;
1696 rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
1697 if( rc!=SQLITE_OK ){
1698 goto delmaster_out;
1699 }
1700 if( exists ){
1701 /* One of the journals pointed to by the master journal exists.
1702 ** Open it and check if it points at the master journal. If
1703 ** so, return without deleting the master journal file.
1704 */
1705 int c;
1706 int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
1707 rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
1708 if( rc!=SQLITE_OK ){
1709 goto delmaster_out;
1710 }
1711
1712 rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
1713 sqlite3OsClose(pJournal);
1714 if( rc!=SQLITE_OK ){
1715 goto delmaster_out;
1716 }
1717
1718 c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
1719 if( c ){
1720 /* We have a match. Do not delete the master journal file. */
1721 goto delmaster_out;
1722 }
1723 }
1724 zJournal += (sqlite3Strlen30(zJournal)+1);
1725 }
1726 }
1727
1728 rc = sqlite3OsDelete(pVfs, zMaster, 0);
1729
1730 delmaster_out:
1731 if( zMasterJournal ){
1732 sqlite3_free(zMasterJournal);
1733 }
1734 if( pMaster ){
1735 sqlite3OsClose(pMaster);
1736 assert( !isOpen(pJournal) );
1737 }
1738 sqlite3_free(pMaster);
1739 return rc;
1740 }
1741
1742
1743 /*
1744 ** This function is used to change the actual size of the database
1745 ** file in the file-system. This only happens when committing a transaction,
1746 ** or rolling back a transaction (including rolling back a hot-journal).
1747 **
1748 ** If the main database file is not open, or an exclusive lock is not
1749 ** held, this function is a no-op. Otherwise, the size of the file is
1750 ** changed to nPage pages (nPage*pPager->pageSize bytes). If the file
1751 ** on disk is currently larger than nPage pages, then use the VFS
1752 ** xTruncate() method to truncate it.
1753 **
1754 ** Or, it might might be the case that the file on disk is smaller than
1755 ** nPage pages. Some operating system implementations can get confused if
1756 ** you try to truncate a file to some size that is larger than it
1757 ** currently is, so detect this case and write a single zero byte to
1758 ** the end of the new file instead.
1759 **
1760 ** If successful, return SQLITE_OK. If an IO error occurs while modifying
1761 ** the database file, return the error code to the caller.
1762 */
1763 static int pager_truncate(Pager *pPager, Pgno nPage){
1764 int rc = SQLITE_OK;
1765 if( pPager->state>=PAGER_EXCLUSIVE && isOpen(pPager->fd) ){
1766 i64 currentSize, newSize;
1767 /* TODO: Is it safe to use Pager.dbFileSize here? */
1768 rc = sqlite3OsFileSize(pPager->fd, &currentSize);
1769 newSize = pPager->pageSize*(i64)nPage;
1770 if( rc==SQLITE_OK && currentSize!=newSize ){
1771 if( currentSize>newSize ){
1772 rc = sqlite3OsTruncate(pPager->fd, newSize);
1773 }else{
1774 rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
1775 }
1776 if( rc==SQLITE_OK ){
1777 pPager->dbFileSize = nPage;
1778 }
1779 }
1780 }
1781 return rc;
1782 }
1783
1784 /*
1785 ** Set the value of the Pager.sectorSize variable for the given
1786 ** pager based on the value returned by the xSectorSize method
1787 ** of the open database file. The sector size will be used used
1788 ** to determine the size and alignment of journal header and
1789 ** master journal pointers within created journal files.
1790 **
1791 ** For temporary files the effective sector size is always 512 bytes.
1792 **
1793 ** Otherwise, for non-temporary files, the effective sector size is
1794 ** the value returned by the xSectorSize() method rounded up to 512 if
1795 ** it is less than 512, or rounded down to MAX_SECTOR_SIZE if it
1796 ** is greater than MAX_SECTOR_SIZE.
1797 */
1798 static void setSectorSize(Pager *pPager){
1799 assert( isOpen(pPager->fd) || pPager->tempFile );
1800
1801 if( !pPager->tempFile ){
1802 /* Sector size doesn't matter for temporary files. Also, the file
1803 ** may not have been opened yet, in which case the OsSectorSize()
1804 ** call will segfault.
1805 */
1806 pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
1807 }
1808 if( pPager->sectorSize<512 ){
1809 pPager->sectorSize = 512;
1810 }
1811 if( pPager->sectorSize>MAX_SECTOR_SIZE ){
1812 assert( MAX_SECTOR_SIZE>=512 );
1813 pPager->sectorSize = MAX_SECTOR_SIZE;
1814 }
1815 }
1816
1817 /*
1818 ** Playback the journal and thus restore the database file to
1819 ** the state it was in before we started making changes.
1820 **
1821 ** The journal file format is as follows:
1822 **
1823 ** (1) 8 byte prefix. A copy of aJournalMagic[].
1824 ** (2) 4 byte big-endian integer which is the number of valid page records
1825 ** in the journal. If this value is 0xffffffff, then compute the
1826 ** number of page records from the journal size.
1827 ** (3) 4 byte big-endian integer which is the initial value for the
1828 ** sanity checksum.
1829 ** (4) 4 byte integer which is the number of pages to truncate the
1830 ** database to during a rollback.
1831 ** (5) 4 byte big-endian integer which is the sector size. The header
1832 ** is this many bytes in size.
1833 ** (6) 4 byte big-endian integer which is the page case.
1834 ** (7) 4 byte integer which is the number of bytes in the master journal
1835 ** name. The value may be zero (indicate that there is no master
1836 ** journal.)
1837 ** (8) N bytes of the master journal name. The name will be nul-terminated
1838 ** and might be shorter than the value read from (5). If the first byte
1839 ** of the name is \000 then there is no master journal. The master
1840 ** journal name is stored in UTF-8.
1841 ** (9) Zero or more pages instances, each as follows:
1842 ** + 4 byte page number.
1843 ** + pPager->pageSize bytes of data.
1844 ** + 4 byte checksum
1845 **
1846 ** When we speak of the journal header, we mean the first 8 items above.
1847 ** Each entry in the journal is an instance of the 9th item.
1848 **
1849 ** Call the value from the second bullet "nRec". nRec is the number of
1850 ** valid page entries in the journal. In most cases, you can compute the
1851 ** value of nRec from the size of the journal file. But if a power
1852 ** failure occurred while the journal was being written, it could be the
1853 ** case that the size of the journal file had already been increased but
1854 ** the extra entries had not yet made it safely to disk. In such a case,
1855 ** the value of nRec computed from the file size would be too large. For
1856 ** that reason, we always use the nRec value in the header.
1857 **
1858 ** If the nRec value is 0xffffffff it means that nRec should be computed
1859 ** from the file size. This value is used when the user selects the
1860 ** no-sync option for the journal. A power failure could lead to corruption
1861 ** in this case. But for things like temporary table (which will be
1862 ** deleted when the power is restored) we don't care.
1863 **
1864 ** If the file opened as the journal file is not a well-formed
1865 ** journal file then all pages up to the first corrupted page are rolled
1866 ** back (or no pages if the journal header is corrupted). The journal file
1867 ** is then deleted and SQLITE_OK returned, just as if no corruption had
1868 ** been encountered.
1869 **
1870 ** If an I/O or malloc() error occurs, the journal-file is not deleted
1871 ** and an error code is returned.
1872 **
1873 ** The isHot parameter indicates that we are trying to rollback a journal
1874 ** that might be a hot journal. Or, it could be that the journal is
1875 ** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE.
1876 ** If the journal really is hot, reset the pager cache prior rolling
1877 ** back any content. If the journal is merely persistent, no reset is
1878 ** needed.
1879 */
1880 static int pager_playback(Pager *pPager, int isHot){
1881 sqlite3_vfs *pVfs = pPager->pVfs;
1882 i64 szJ; /* Size of the journal file in bytes */
1883 u32 nRec; /* Number of Records in the journal */
1884 u32 u; /* Unsigned loop counter */
1885 Pgno mxPg = 0; /* Size of the original file in pages */
1886 int rc; /* Result code of a subroutine */
1887 int res = 1; /* Value returned by sqlite3OsAccess() */
1888 char *zMaster = 0; /* Name of master journal file if any */
1889 int needPagerReset; /* True to reset page prior to first page rollback */
1890
1891 /* Figure out how many records are in the journal. Abort early if
1892 ** the journal is empty.
1893 */
1894 assert( isOpen(pPager->jfd) );
1895 rc = sqlite3OsFileSize(pPager->jfd, &szJ);
1896 if( rc!=SQLITE_OK || szJ==0 ){
1897 goto end_playback;
1898 }
1899
1900 /* Read the master journal name from the journal, if it is present.
1901 ** If a master journal file name is specified, but the file is not
1902 ** present on disk, then the journal is not hot and does not need to be
1903 ** played back.
1904 **
1905 ** TODO: Technically the following is an error because it assumes that
1906 ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that
1907 ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c,
1908 ** mxPathname is 512, which is the same as the minimum allowable value
1909 ** for pageSize.
1910 */
1911 zMaster = pPager->pTmpSpace;
1912 rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
1913 if( rc==SQLITE_OK && zMaster[0] ){
1914 rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
1915 }
1916 zMaster = 0;
1917 if( rc!=SQLITE_OK || !res ){
1918 goto end_playback;
1919 }
1920 pPager->journalOff = 0;
1921 needPagerReset = isHot;
1922
1923 /* This loop terminates either when a readJournalHdr() or
1924 ** pager_playback_one_page() call returns SQLITE_DONE or an IO error
1925 ** occurs.
1926 */
1927 while( 1 ){
1928 int isUnsync = 0;
1929
1930 /* Read the next journal header from the journal file. If there are
1931 ** not enough bytes left in the journal file for a complete header, or
1932 ** it is corrupted, then a process must of failed while writing it.
1933 ** This indicates nothing more needs to be rolled back.
1934 */
1935 rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg);
1936 if( rc!=SQLITE_OK ){
1937 if( rc==SQLITE_DONE ){
1938 rc = SQLITE_OK;
1939 }
1940 goto end_playback;
1941 }
1942
1943 /* If nRec is 0xffffffff, then this journal was created by a process
1944 ** working in no-sync mode. This means that the rest of the journal
1945 ** file consists of pages, there are no more journal headers. Compute
1946 ** the value of nRec based on this assumption.
1947 */
1948 if( nRec==0xffffffff ){
1949 assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
1950 nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager));
1951 }
1952
1953 /* If nRec is 0 and this rollback is of a transaction created by this
1954 ** process and if this is the final header in the journal, then it means
1955 ** that this part of the journal was being filled but has not yet been
1956 ** synced to disk. Compute the number of pages based on the remaining
1957 ** size of the file.
1958 **
1959 ** The third term of the test was added to fix ticket #2565.
1960 ** When rolling back a hot journal, nRec==0 always means that the next
1961 ** chunk of the journal contains zero pages to be rolled back. But
1962 ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in
1963 ** the journal, it means that the journal might contain additional
1964 ** pages that need to be rolled back and that the number of pages
1965 ** should be computed based on the journal file size.
1966 */
1967 if( nRec==0 && !isHot &&
1968 pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
1969 nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
1970 isUnsync = 1;
1971 }
1972
1973 /* If this is the first header read from the journal, truncate the
1974 ** database file back to its original size.
1975 */
1976 if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
1977 rc = pager_truncate(pPager, mxPg);
1978 if( rc!=SQLITE_OK ){
1979 goto end_playback;
1980 }
1981 pPager->dbSize = mxPg;
1982 }
1983
1984 /* Copy original pages out of the journal and back into the
1985 ** database file and/or page cache.
1986 */
1987 for(u=0; u<nRec; u++){
1988 if( needPagerReset ){
1989 pager_reset(pPager);
1990 needPagerReset = 0;
1991 }
1992 rc = pager_playback_one_page(pPager,1,isUnsync,&pPager->journalOff,0,0);
1993 if( rc!=SQLITE_OK ){
1994 if( rc==SQLITE_DONE ){
1995 rc = SQLITE_OK;
1996 pPager->journalOff = szJ;
1997 break;
1998 }else{
1999 /* If we are unable to rollback, quit and return the error
2000 ** code. This will cause the pager to enter the error state
2001 ** so that no further harm will be done. Perhaps the next
2002 ** process to come along will be able to rollback the database.
2003 */
2004 goto end_playback;
2005 }
2006 }
2007 }
2008 }
2009 /*NOTREACHED*/
2010 assert( 0 );
2011
2012 end_playback:
2013 /* Following a rollback, the database file should be back in its original
2014 ** state prior to the start of the transaction, so invoke the
2015 ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the
2016 ** assertion that the transaction counter was modified.
2017 */
2018 assert(
2019 pPager->fd->pMethods==0 ||
2020 sqlite3OsFileControl(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0)>=SQLITE_OK
2021 );
2022
2023 /* If this playback is happening automatically as a result of an IO or
2024 ** malloc error that occurred after the change-counter was updated but
2025 ** before the transaction was committed, then the change-counter
2026 ** modification may just have been reverted. If this happens in exclusive
2027 ** mode, then subsequent transactions performed by the connection will not
2028 ** update the change-counter at all. This may lead to cache inconsistency
2029 ** problems for other processes at some point in the future. So, just
2030 ** in case this has happened, clear the changeCountDone flag now.
2031 */
2032 pPager->changeCountDone = pPager->tempFile;
2033
2034 if( rc==SQLITE_OK ){
2035 zMaster = pPager->pTmpSpace;
2036 rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
2037 testcase( rc!=SQLITE_OK );
2038 }
2039 if( rc==SQLITE_OK ){
2040 rc = pager_end_transaction(pPager, zMaster[0]!='\0');
2041 testcase( rc!=SQLITE_OK );
2042 }
2043 if( rc==SQLITE_OK && zMaster[0] && res ){
2044 /* If there was a master journal and this routine will return success,
2045 ** see if it is possible to delete the master journal.
2046 */
2047 rc = pager_delmaster(pPager, zMaster);
2048 testcase( rc!=SQLITE_OK );
2049 }
2050
2051 /* The Pager.sectorSize variable may have been updated while rolling
2052 ** back a journal created by a process with a different sector size
2053 ** value. Reset it to the correct value for this process.
2054 */
2055 setSectorSize(pPager);
2056 return rc;
2057 }
2058
2059 /*
2060 ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
2061 ** the entire master journal file. The case pSavepoint==NULL occurs when
2062 ** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction
2063 ** savepoint.
2064 **
2065 ** When pSavepoint is not NULL (meaning a non-transaction savepoint is
2066 ** being rolled back), then the rollback consists of up to three stages,
2067 ** performed in the order specified:
2068 **
2069 ** * Pages are played back from the main journal starting at byte
2070 ** offset PagerSavepoint.iOffset and continuing to
2071 ** PagerSavepoint.iHdrOffset, or to the end of the main journal
2072 ** file if PagerSavepoint.iHdrOffset is zero.
2073 **
2074 ** * If PagerSavepoint.iHdrOffset is not zero, then pages are played
2075 ** back starting from the journal header immediately following
2076 ** PagerSavepoint.iHdrOffset to the end of the main journal file.
2077 **
2078 ** * Pages are then played back from the sub-journal file, starting
2079 ** with the PagerSavepoint.iSubRec and continuing to the end of
2080 ** the journal file.
2081 **
2082 ** Throughout the rollback process, each time a page is rolled back, the
2083 ** corresponding bit is set in a bitvec structure (variable pDone in the
2084 ** implementation below). This is used to ensure that a page is only
2085 ** rolled back the first time it is encountered in either journal.
2086 **
2087 ** If pSavepoint is NULL, then pages are only played back from the main
2088 ** journal file. There is no need for a bitvec in this case.
2089 **
2090 ** In either case, before playback commences the Pager.dbSize variable
2091 ** is reset to the value that it held at the start of the savepoint
2092 ** (or transaction). No page with a page-number greater than this value
2093 ** is played back. If one is encountered it is simply skipped.
2094 */
2095 static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
2096 i64 szJ; /* Effective size of the main journal */
2097 i64 iHdrOff; /* End of first segment of main-journal records */
2098 int rc = SQLITE_OK; /* Return code */
2099 Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */
2100
2101 assert( pPager->state>=PAGER_SHARED );
2102
2103 /* Allocate a bitvec to use to store the set of pages rolled back */
2104 if( pSavepoint ){
2105 pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
2106 if( !pDone ){
2107 return SQLITE_NOMEM;
2108 }
2109 }
2110
2111 /* Set the database size back to the value it was before the savepoint
2112 ** being reverted was opened.
2113 */
2114 pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;
2115
2116 /* Use pPager->journalOff as the effective size of the main rollback
2117 ** journal. The actual file might be larger than this in
2118 ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything
2119 ** past pPager->journalOff is off-limits to us.
2120 */
2121 szJ = pPager->journalOff;
2122
2123 /* Begin by rolling back records from the main journal starting at
2124 ** PagerSavepoint.iOffset and continuing to the next journal header.
2125 ** There might be records in the main journal that have a page number
2126 ** greater than the current database size (pPager->dbSize) but those
2127 ** will be skipped automatically. Pages are added to pDone as they
2128 ** are played back.
2129 */
2130 if( pSavepoint ){
2131 iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
2132 pPager->journalOff = pSavepoint->iOffset;
2133 while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
2134 rc = pager_playback_one_page(pPager, 1, 0, &pPager->journalOff, 1, pDone);
2135 }
2136 assert( rc!=SQLITE_DONE );
2137 }else{
2138 pPager->journalOff = 0;
2139 }
2140
2141 /* Continue rolling back records out of the main journal starting at
2142 ** the first journal header seen and continuing until the effective end
2143 ** of the main journal file. Continue to skip out-of-range pages and
2144 ** continue adding pages rolled back to pDone.
2145 */
2146 while( rc==SQLITE_OK && pPager->journalOff<szJ ){
2147 u32 ii; /* Loop counter */
2148 u32 nJRec = 0; /* Number of Journal Records */
2149 u32 dummy;
2150 rc = readJournalHdr(pPager, 0, szJ, &nJRec, &dummy);
2151 assert( rc!=SQLITE_DONE );
2152
2153 /*
2154 ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff"
2155 ** test is related to ticket #2565. See the discussion in the
2156 ** pager_playback() function for additional information.
2157 */
2158 if( nJRec==0
2159 && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff
2160 ){
2161 nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager));
2162 }
2163 for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
2164 rc = pager_playback_one_page(pPager, 1, 0, &pPager->journalOff, 1, pDone);
2165 }
2166 assert( rc!=SQLITE_DONE );
2167 }
2168 assert( rc!=SQLITE_OK || pPager->journalOff==szJ );
2169
2170 /* Finally, rollback pages from the sub-journal. Page that were
2171 ** previously rolled back out of the main journal (and are hence in pDone)
2172 ** will be skipped. Out-of-range pages are also skipped.
2173 */
2174 if( pSavepoint ){
2175 u32 ii; /* Loop counter */
2176 i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize);
2177 for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){
2178 assert( offset==ii*(4+pPager->pageSize) );
2179 rc = pager_playback_one_page(pPager, 0, 0, &offset, 1, pDone);
2180 }
2181 assert( rc!=SQLITE_DONE );
2182 }
2183
2184 sqlite3BitvecDestroy(pDone);
2185 if( rc==SQLITE_OK ){
2186 pPager->journalOff = szJ;
2187 }
2188 return rc;
2189 }
2190
2191 /*
2192 ** Change the maximum number of in-memory pages that are allowed.
2193 */
2194 void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
2195 sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
2196 }
2197
2198 /*
2199 ** Adjust the robustness of the database to damage due to OS crashes
2200 ** or power failures by changing the number of syncs()s when writing
2201 ** the rollback journal. There are three levels:
2202 **
2203 ** OFF sqlite3OsSync() is never called. This is the default
2204 ** for temporary and transient files.
2205 **
2206 ** NORMAL The journal is synced once before writes begin on the
2207 ** database. This is normally adequate protection, but
2208 ** it is theoretically possible, though very unlikely,
2209 ** that an inopertune power failure could leave the journal
2210 ** in a state which would cause damage to the database
2211 ** when it is rolled back.
2212 **
2213 ** FULL The journal is synced twice before writes begin on the
2214 ** database (with some additional information - the nRec field
2215 ** of the journal header - being written in between the two
2216 ** syncs). If we assume that writing a
2217 ** single disk sector is atomic, then this mode provides
2218 ** assurance that the journal will not be corrupted to the
2219 ** point of causing damage to the database during rollback.
2220 **
2221 ** Numeric values associated with these states are OFF==1, NORMAL=2,
2222 ** and FULL=3.
2223 */
2224 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
2225 void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){
2226 pPager->noSync = (level==1 || pPager->tempFile) ?1:0;
2227 pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0;
2228 pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
2229 if( pPager->noSync ) pPager->needSync = 0;
2230 }
2231 #endif
2232
2233 /*
2234 ** The following global variable is incremented whenever the library
2235 ** attempts to open a temporary file. This information is used for
2236 ** testing and analysis only.
2237 */
2238 #ifdef SQLITE_TEST
2239 int sqlite3_opentemp_count = 0;
2240 #endif
2241
2242 /*
2243 ** Open a temporary file.
2244 **
2245 ** Write the file descriptor into *pFile. Return SQLITE_OK on success
2246 ** or some other error code if we fail. The OS will automatically
2247 ** delete the temporary file when it is closed.
2248 **
2249 ** The flags passed to the VFS layer xOpen() call are those specified
2250 ** by parameter vfsFlags ORed with the following:
2251 **
2252 ** SQLITE_OPEN_READWRITE
2253 ** SQLITE_OPEN_CREATE
2254 ** SQLITE_OPEN_EXCLUSIVE
2255 ** SQLITE_OPEN_DELETEONCLOSE
2256 */
2257 static int pagerOpentemp(
2258 Pager *pPager, /* The pager object */
2259 sqlite3_file *pFile, /* Write the file descriptor here */
2260 int vfsFlags /* Flags passed through to the VFS */
2261 ){
2262 int rc; /* Return code */
2263
2264 #ifdef SQLITE_TEST
2265 sqlite3_opentemp_count++; /* Used for testing and analysis only */
2266 #endif
2267
2268 vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
2269 SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
2270 rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
2271 assert( rc!=SQLITE_OK || isOpen(pFile) );
2272 return rc;
2273 }
2274
2275 /*
2276 ** Set the busy handler function.
2277 **
2278 ** The pager invokes the busy-handler if sqlite3OsLock() returns
2279 ** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock,
2280 ** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE
2281 ** lock. It does *not* invoke the busy handler when upgrading from
2282 ** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE
2283 ** (which occurs during hot-journal rollback). Summary:
2284 **
2285 ** Transition | Invokes xBusyHandler
2286 ** --------------------------------------------------------
2287 ** NO_LOCK -> SHARED_LOCK | Yes
2288 ** SHARED_LOCK -> RESERVED_LOCK | No
2289 ** SHARED_LOCK -> EXCLUSIVE_LOCK | No
2290 ** RESERVED_LOCK -> EXCLUSIVE_LOCK | Yes
2291 **
2292 ** If the busy-handler callback returns non-zero, the lock is
2293 ** retried. If it returns zero, then the SQLITE_BUSY error is
2294 ** returned to the caller of the pager API function.
2295 */
2296 void sqlite3PagerSetBusyhandler(
2297 Pager *pPager, /* Pager object */
2298 int (*xBusyHandler)(void *), /* Pointer to busy-handler function */
2299 void *pBusyHandlerArg /* Argument to pass to xBusyHandler */
2300 ){
2301 pPager->xBusyHandler = xBusyHandler;
2302 pPager->pBusyHandlerArg = pBusyHandlerArg;
2303 }
2304
2305 /*
2306 ** Report the current page size and number of reserved bytes back
2307 ** to the codec.
2308 */
2309 #ifdef SQLITE_HAS_CODEC
2310 static void pagerReportSize(Pager *pPager){
2311 if( pPager->xCodecSizeChng ){
2312 pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize,
2313 (int)pPager->nReserve);
2314 }
2315 }
2316 #else
2317 # define pagerReportSize(X) /* No-op if we do not support a codec */
2318 #endif
2319
2320 /*
2321 ** Change the page size used by the Pager object. The new page size
2322 ** is passed in *pPageSize.
2323 **
2324 ** If the pager is in the error state when this function is called, it
2325 ** is a no-op. The value returned is the error state error code (i.e.
2326 ** one of SQLITE_IOERR, SQLITE_CORRUPT or SQLITE_FULL).
2327 **
2328 ** Otherwise, if all of the following are true:
2329 **
2330 ** * the new page size (value of *pPageSize) is valid (a power
2331 ** of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and
2332 **
2333 ** * there are no outstanding page references, and
2334 **
2335 ** * the database is either not an in-memory database or it is
2336 ** an in-memory database that currently consists of zero pages.
2337 **
2338 ** then the pager object page size is set to *pPageSize.
2339 **
2340 ** If the page size is changed, then this function uses sqlite3PagerMalloc()
2341 ** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt
2342 ** fails, SQLITE_NOMEM is returned and the page size remains unchanged.
2343 ** In all other cases, SQLITE_OK is returned.
2344 **
2345 ** If the page size is not changed, either because one of the enumerated
2346 ** conditions above is not true, the pager was in error state when this
2347 ** function was called, or because the memory allocation attempt failed,
2348 ** then *pPageSize is set to the old, retained page size before returning.
2349 */
2350 int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize, int nReserve){
2351 int rc = pPager->errCode;
2352
2353 if( rc==SQLITE_OK ){
2354 u16 pageSize = *pPageSize;
2355 assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
2356 if( (pPager->memDb==0 || pPager->dbSize==0)
2357 && sqlite3PcacheRefCount(pPager->pPCache)==0
2358 && pageSize && pageSize!=pPager->pageSize
2359 ){
2360 char *pNew = (char *)sqlite3PageMalloc(pageSize);
2361 if( !pNew ){
2362 rc = SQLITE_NOMEM;
2363 }else{
2364 pager_reset(pPager);
2365 pPager->pageSize = pageSize;
2366 sqlite3PageFree(pPager->pTmpSpace);
2367 pPager->pTmpSpace = pNew;
2368 sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
2369 }
2370 }
2371 *pPageSize = (u16)pPager->pageSize;
2372 if( nReserve<0 ) nReserve = pPager->nReserve;
2373 assert( nReserve>=0 && nReserve<1000 );
2374 pPager->nReserve = (i16)nReserve;
2375 pagerReportSize(pPager);
2376 }
2377 return rc;
2378 }
2379
2380 /*
2381 ** Return a pointer to the "temporary page" buffer held internally
2382 ** by the pager. This is a buffer that is big enough to hold the
2383 ** entire content of a database page. This buffer is used internally
2384 ** during rollback and will be overwritten whenever a rollback
2385 ** occurs. But other modules are free to use it too, as long as
2386 ** no rollbacks are happening.
2387 */
2388 void *sqlite3PagerTempSpace(Pager *pPager){
2389 return pPager->pTmpSpace;
2390 }
2391
2392 /*
2393 ** Attempt to set the maximum database page count if mxPage is positive.
2394 ** Make no changes if mxPage is zero or negative. And never reduce the
2395 ** maximum page count below the current size of the database.
2396 **
2397 ** Regardless of mxPage, return the current maximum page count.
2398 */
2399 int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
2400 if( mxPage>0 ){
2401 pPager->mxPgno = mxPage;
2402 }
2403 sqlite3PagerPagecount(pPager, 0);
2404 return pPager->mxPgno;
2405 }
2406
2407 /*
2408 ** The following set of routines are used to disable the simulated
2409 ** I/O error mechanism. These routines are used to avoid simulated
2410 ** errors in places where we do not care about errors.
2411 **
2412 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
2413 ** and generate no code.
2414 */
2415 #ifdef SQLITE_TEST
2416 extern int sqlite3_io_error_pending;
2417 extern int sqlite3_io_error_hit;
2418 static int saved_cnt;
2419 void disable_simulated_io_errors(void){
2420 saved_cnt = sqlite3_io_error_pending;
2421 sqlite3_io_error_pending = -1;
2422 }
2423 void enable_simulated_io_errors(void){
2424 sqlite3_io_error_pending = saved_cnt;
2425 }
2426 #else
2427 # define disable_simulated_io_errors()
2428 # define enable_simulated_io_errors()
2429 #endif
2430
2431 /*
2432 ** Read the first N bytes from the beginning of the file into memory
2433 ** that pDest points to.
2434 **
2435 ** If the pager was opened on a transient file (zFilename==""), or
2436 ** opened on a file less than N bytes in size, the output buffer is
2437 ** zeroed and SQLITE_OK returned. The rationale for this is that this
2438 ** function is used to read database headers, and a new transient or
2439 ** zero sized database has a header than consists entirely of zeroes.
2440 **
2441 ** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered,
2442 ** the error code is returned to the caller and the contents of the
2443 ** output buffer undefined.
2444 */
2445 int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
2446 int rc = SQLITE_OK;
2447 memset(pDest, 0, N);
2448 assert( isOpen(pPager->fd) || pPager->tempFile );
2449 if( isOpen(pPager->fd) ){
2450 IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
2451 rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
2452 if( rc==SQLITE_IOERR_SHORT_READ ){
2453 rc = SQLITE_OK;
2454 }
2455 }
2456 return rc;
2457 }
2458
2459 /*
2460 ** Return the total number of pages in the database file associated
2461 ** with pPager. Normally, this is calculated as (<db file size>/<page-size>).
2462 ** However, if the file is between 1 and <page-size> bytes in size, then
2463 ** this is considered a 1 page file.
2464 **
2465 ** If the pager is in error state when this function is called, then the
2466 ** error state error code is returned and *pnPage left unchanged. Or,
2467 ** if the file system has to be queried for the size of the file and
2468 ** the query attempt returns an IO error, the IO error code is returned
2469 ** and *pnPage is left unchanged.
2470 **
2471 ** Otherwise, if everything is successful, then SQLITE_OK is returned
2472 ** and *pnPage is set to the number of pages in the database.
2473 */
2474 int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
2475 Pgno nPage; /* Value to return via *pnPage */
2476
2477 /* If the pager is already in the error state, return the error code. */
2478 if( pPager->errCode ){
2479 return pPager->errCode;
2480 }
2481
2482 /* Determine the number of pages in the file. Store this in nPage. */
2483 if( pPager->dbSizeValid ){
2484 nPage = pPager->dbSize;
2485 }else{
2486 int rc; /* Error returned by OsFileSize() */
2487 i64 n = 0; /* File size in bytes returned by OsFileSize() */
2488
2489 assert( isOpen(pPager->fd) || pPager->tempFile );
2490 if( isOpen(pPager->fd) && (0 != (rc = sqlite3OsFileSize(pPager->fd, &n))) ){
2491 pager_error(pPager, rc);
2492 return rc;
2493 }
2494 if( n>0 && n<pPager->pageSize ){
2495 nPage = 1;
2496 }else{
2497 nPage = (Pgno)(n / pPager->pageSize);
2498 }
2499 if( pPager->state!=PAGER_UNLOCK ){
2500 pPager->dbSize = nPage;
2501 pPager->dbFileSize = nPage;
2502 pPager->dbSizeValid = 1;
2503 }
2504 }
2505
2506 /* If the current number of pages in the file is greater than the
2507 ** configured maximum pager number, increase the allowed limit so
2508 ** that the file can be read.
2509 */
2510 if( nPage>pPager->mxPgno ){
2511 pPager->mxPgno = (Pgno)nPage;
2512 }
2513
2514 /* Set the output variable and return SQLITE_OK */
2515 if( pnPage ){
2516 *pnPage = nPage;
2517 }
2518 return SQLITE_OK;
2519 }
2520
2521
2522 /*
2523 ** Try to obtain a lock of type locktype on the database file. If
2524 ** a similar or greater lock is already held, this function is a no-op
2525 ** (returning SQLITE_OK immediately).
2526 **
2527 ** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke
2528 ** the busy callback if the lock is currently not available. Repeat
2529 ** until the busy callback returns false or until the attempt to
2530 ** obtain the lock succeeds.
2531 **
2532 ** Return SQLITE_OK on success and an error code if we cannot obtain
2533 ** the lock. If the lock is obtained successfully, set the Pager.state
2534 ** variable to locktype before returning.
2535 */
2536 static int pager_wait_on_lock(Pager *pPager, int locktype){
2537 int rc; /* Return code */
2538
2539 /* The OS lock values must be the same as the Pager lock values */
2540 assert( PAGER_SHARED==SHARED_LOCK );
2541 assert( PAGER_RESERVED==RESERVED_LOCK );
2542 assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
2543
2544 /* If the file is currently unlocked then the size must be unknown */
2545 assert( pPager->state>=PAGER_SHARED || pPager->dbSizeValid==0 );
2546
2547 /* Check that this is either a no-op (because the requested lock is
2548 ** already held, or one of the transistions that the busy-handler
2549 ** may be invoked during, according to the comment above
2550 ** sqlite3PagerSetBusyhandler().
2551 */
2552 assert( (pPager->state>=locktype)
2553 || (pPager->state==PAGER_UNLOCK && locktype==PAGER_SHARED)
2554 || (pPager->state==PAGER_RESERVED && locktype==PAGER_EXCLUSIVE)
2555 );
2556
2557 if( pPager->state>=locktype ){
2558 rc = SQLITE_OK;
2559 }else{
2560 do {
2561 rc = sqlite3OsLock(pPager->fd, locktype);
2562 }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) );
2563 if( rc==SQLITE_OK ){
2564 pPager->state = (u8)locktype;
2565 IOTRACE(("LOCK %p %d\n", pPager, locktype))
2566 }
2567 }
2568 return rc;
2569 }
2570
2571 /*
2572 ** Function assertTruncateConstraint(pPager) checks that one of the
2573 ** following is true for all dirty pages currently in the page-cache:
2574 **
2575 ** a) The page number is less than or equal to the size of the
2576 ** current database image, in pages, OR
2577 **
2578 ** b) if the page content were written at this time, it would not
2579 ** be necessary to write the current content out to the sub-journal
2580 ** (as determined by function subjRequiresPage()).
2581 **
2582 ** If the condition asserted by this function were not true, and the
2583 ** dirty page were to be discarded from the cache via the pagerStress()
2584 ** routine, pagerStress() would not write the current page content to
2585 ** the database file. If a savepoint transaction were rolled back after
2586 ** this happened, the correct behaviour would be to restore the current
2587 ** content of the page. However, since this content is not present in either
2588 ** the database file or the portion of the rollback journal and
2589 ** sub-journal rolled back the content could not be restored and the
2590 ** database image would become corrupt. It is therefore fortunate that
2591 ** this circumstance cannot arise.
2592 */
2593 #if defined(SQLITE_DEBUG)
2594 static void assertTruncateConstraintCb(PgHdr *pPg){
2595 assert( pPg->flags&PGHDR_DIRTY );
2596 assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize );
2597 }
2598 static void assertTruncateConstraint(Pager *pPager){
2599 sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb);
2600 }
2601 #else
2602 # define assertTruncateConstraint(pPager)
2603 #endif
2604
2605 /*
2606 ** Truncate the in-memory database file image to nPage pages. This
2607 ** function does not actually modify the database file on disk. It
2608 ** just sets the internal state of the pager object so that the
2609 ** truncation will be done when the current transaction is committed.
2610 */
2611 void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
2612 assert( pPager->dbSizeValid );
2613 assert( pPager->dbSize>=nPage );
2614 assert( pPager->state>=PAGER_RESERVED );
2615 pPager->dbSize = nPage;
2616 assertTruncateConstraint(pPager);
2617 }
2618
2619 /*
2620 ** Shutdown the page cache. Free all memory and close all files.
2621 **
2622 ** If a transaction was in progress when this routine is called, that
2623 ** transaction is rolled back. All outstanding pages are invalidated
2624 ** and their memory is freed. Any attempt to use a page associated
2625 ** with this page cache after this function returns will likely
2626 ** result in a coredump.
2627 **
2628 ** This function always succeeds. If a transaction is active an attempt
2629 ** is made to roll it back. If an error occurs during the rollback
2630 ** a hot journal may be left in the filesystem but no error is returned
2631 ** to the caller.
2632 */
2633 int sqlite3PagerClose(Pager *pPager){
2634 disable_simulated_io_errors();
2635 sqlite3BeginBenignMalloc();
2636 pPager->errCode = 0;
2637 pPager->exclusiveMode = 0;
2638 pager_reset(pPager);
2639 if( MEMDB ){
2640 pager_unlock(pPager);
2641 }else{
2642 /* Set Pager.journalHdr to -1 for the benefit of the pager_playback()
2643 ** call which may be made from within pagerUnlockAndRollback(). If it
2644 ** is not -1, then the unsynced portion of an open journal file may
2645 ** be played back into the database. If a power failure occurs while
2646 ** this is happening, the database may become corrupt.
2647 */
2648 pPager->journalHdr = -1;
2649 pagerUnlockAndRollback(pPager);
2650 }
2651 sqlite3EndBenignMalloc();
2652 enable_simulated_io_errors();
2653 PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
2654 IOTRACE(("CLOSE %p\n", pPager))
2655 sqlite3OsClose(pPager->fd);
2656 sqlite3PageFree(pPager->pTmpSpace);
2657 sqlite3PcacheClose(pPager->pPCache);
2658
2659 #ifdef SQLITE_HAS_CODEC
2660 if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
2661 #endif
2662
2663 assert( !pPager->aSavepoint && !pPager->pInJournal );
2664 assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) );
2665
2666 sqlite3_free(pPager);
2667 return SQLITE_OK;
2668 }
2669
2670 #if !defined(NDEBUG) || defined(SQLITE_TEST)
2671 /*
2672 ** Return the page number for page pPg.
2673 */
2674 Pgno sqlite3PagerPagenumber(DbPage *pPg){
2675 return pPg->pgno;
2676 }
2677 #endif
2678
2679 /*
2680 ** Increment the reference count for page pPg.
2681 */
2682 void sqlite3PagerRef(DbPage *pPg){
2683 sqlite3PcacheRef(pPg);
2684 }
2685
2686 /*
2687 ** Sync the journal. In other words, make sure all the pages that have
2688 ** been written to the journal have actually reached the surface of the
2689 ** disk and can be restored in the event of a hot-journal rollback.
2690 **
2691 ** If the Pager.needSync flag is not set, then this function is a
2692 ** no-op. Otherwise, the actions required depend on the journal-mode
2693 ** and the device characteristics of the the file-system, as follows:
2694 **
2695 ** * If the journal file is an in-memory journal file, no action need
2696 ** be taken.
2697 **
2698 ** * Otherwise, if the device does not support the SAFE_APPEND property,
2699 ** then the nRec field of the most recently written journal header
2700 ** is updated to contain the number of journal records that have
2701 ** been written following it. If the pager is operating in full-sync
2702 ** mode, then the journal file is synced before this field is updated.
2703 **
2704 ** * If the device does not support the SEQUENTIAL property, then
2705 ** journal file is synced.
2706 **
2707 ** Or, in pseudo-code:
2708 **
2709 ** if( NOT <in-memory journal> ){
2710 ** if( NOT SAFE_APPEND ){
2711 ** if( <full-sync mode> ) xSync(<journal file>);
2712 ** <update nRec field>
2713 ** }
2714 ** if( NOT SEQUENTIAL ) xSync(<journal file>);
2715 ** }
2716 **
2717 ** The Pager.needSync flag is never be set for temporary files, or any
2718 ** file operating in no-sync mode (Pager.noSync set to non-zero).
2719 **
2720 ** If successful, this routine clears the PGHDR_NEED_SYNC flag of every
2721 ** page currently held in memory before returning SQLITE_OK. If an IO
2722 ** error is encountered, then the IO error code is returned to the caller.
2723 */
2724 static int syncJournal(Pager *pPager){
2725 if( pPager->needSync ){
2726 assert( !pPager->tempFile );
2727 if( pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
2728 int rc; /* Return code */
2729 const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
2730 assert( isOpen(pPager->jfd) );
2731
2732 if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
2733 /* This block deals with an obscure problem. If the last connection
2734 ** that wrote to this database was operating in persistent-journal
2735 ** mode, then the journal file may at this point actually be larger
2736 ** than Pager.journalOff bytes. If the next thing in the journal
2737 ** file happens to be a journal-header (written as part of the
2738 ** previous connections transaction), and a crash or power-failure
2739 ** occurs after nRec is updated but before this connection writes
2740 ** anything else to the journal file (or commits/rolls back its
2741 ** transaction), then SQLite may become confused when doing the
2742 ** hot-journal rollback following recovery. It may roll back all
2743 ** of this connections data, then proceed to rolling back the old,
2744 ** out-of-date data that follows it. Database corruption.
2745 **
2746 ** To work around this, if the journal file does appear to contain
2747 ** a valid header following Pager.journalOff, then write a 0x00
2748 ** byte to the start of it to prevent it from being recognized.
2749 **
2750 ** Variable iNextHdrOffset is set to the offset at which this
2751 ** problematic header will occur, if it exists. aMagic is used
2752 ** as a temporary buffer to inspect the first couple of bytes of
2753 ** the potential journal header.
2754 */
2755 i64 iNextHdrOffset;
2756 u8 aMagic[8];
2757 u8 zHeader[sizeof(aJournalMagic)+4];
2758
2759 memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
2760 put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec);
2761
2762 iNextHdrOffset = journalHdrOffset(pPager);
2763 rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset);
2764 if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){
2765 static const u8 zerobyte = 0;
2766 rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset);
2767 }
2768 if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
2769 return rc;
2770 }
2771
2772 /* Write the nRec value into the journal file header. If in
2773 ** full-synchronous mode, sync the journal first. This ensures that
2774 ** all data has really hit the disk before nRec is updated to mark
2775 ** it as a candidate for rollback.
2776 **
2777 ** This is not required if the persistent media supports the
2778 ** SAFE_APPEND property. Because in this case it is not possible
2779 ** for garbage data to be appended to the file, the nRec field
2780 ** is populated with 0xFFFFFFFF when the journal header is written
2781 ** and never needs to be updated.
2782 */
2783 if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
2784 PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
2785 IOTRACE(("JSYNC %p\n", pPager))
2786 rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
2787 if( rc!=SQLITE_OK ) return rc;
2788 }
2789 IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr));
2790 rc = sqlite3OsWrite(
2791 pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr
2792 );
2793 if( rc!=SQLITE_OK ) return rc;
2794 }
2795 if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
2796 PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
2797 IOTRACE(("JSYNC %p\n", pPager))
2798 rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags|
2799 (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
2800 );
2801 if( rc!=SQLITE_OK ) return rc;
2802 }
2803 }
2804
2805 /* The journal file was just successfully synced. Set Pager.needSync
2806 ** to zero and clear the PGHDR_NEED_SYNC flag on all pagess.
2807 */
2808 pPager->needSync = 0;
2809 pPager->journalStarted = 1;
2810 sqlite3PcacheClearSyncFlags(pPager->pPCache);
2811 }
2812
2813 return SQLITE_OK;
2814 }
2815
2816 /*
2817 ** The argument is the first in a linked list of dirty pages connected
2818 ** by the PgHdr.pDirty pointer. This function writes each one of the
2819 ** in-memory pages in the list to the database file. The argument may
2820 ** be NULL, representing an empty list. In this case this function is
2821 ** a no-op.
2822 **
2823 ** The pager must hold at least a RESERVED lock when this function
2824 ** is called. Before writing anything to the database file, this lock
2825 ** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained,
2826 ** SQLITE_BUSY is returned and no data is written to the database file.
2827 **
2828 ** If the pager is a temp-file pager and the actual file-system file
2829 ** is not yet open, it is created and opened before any data is
2830 ** written out.
2831 **
2832 ** Once the lock has been upgraded and, if necessary, the file opened,
2833 ** the pages are written out to the database file in list order. Writing
2834 ** a page is skipped if it meets either of the following criteria:
2835 **
2836 ** * The page number is greater than Pager.dbSize, or
2837 ** * The PGHDR_DONT_WRITE flag is set on the page.
2838 **
2839 ** If writing out a page causes the database file to grow, Pager.dbFileSize
2840 ** is updated accordingly. If page 1 is written out, then the value cached
2841 ** in Pager.dbFileVers[] is updated to match the new value stored in
2842 ** the database file.
2843 **
2844 ** If everything is successful, SQLITE_OK is returned. If an IO error
2845 ** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot
2846 ** be obtained, SQLITE_BUSY is returned.
2847 */
2848 static int pager_write_pagelist(PgHdr *pList){
2849 Pager *pPager; /* Pager object */
2850 int rc; /* Return code */
2851
2852 if( NEVER(pList==0) ) return SQLITE_OK;
2853 pPager = pList->pPager;
2854
2855 /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
2856 ** database file. If there is already an EXCLUSIVE lock, the following
2857 ** call is a no-op.
2858 **
2859 ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
2860 ** through an intermediate state PENDING. A PENDING lock prevents new
2861 ** readers from attaching to the database but is unsufficient for us to
2862 ** write. The idea of a PENDING lock is to prevent new readers from
2863 ** coming in while we wait for existing readers to clear.
2864 **
2865 ** While the pager is in the RESERVED state, the original database file
2866 ** is unchanged and we can rollback without having to playback the
2867 ** journal into the original database file. Once we transition to
2868 ** EXCLUSIVE, it means the database file has been changed and any rollback
2869 ** will require a journal playback.
2870 */
2871 assert( pPager->state>=PAGER_RESERVED );
2872 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
2873
2874 /* If the file is a temp-file has not yet been opened, open it now. It
2875 ** is not possible for rc to be other than SQLITE_OK if this branch
2876 ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
2877 */
2878 if( !isOpen(pPager->fd) ){
2879 assert( pPager->tempFile && rc==SQLITE_OK );
2880 rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
2881 }
2882
2883 while( rc==SQLITE_OK && pList ){
2884 Pgno pgno = pList->pgno;
2885
2886 /* If there are dirty pages in the page cache with page numbers greater
2887 ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to
2888 ** make the file smaller (presumably by auto-vacuum code). Do not write
2889 ** any such pages to the file.
2890 **
2891 ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag
2892 ** set (set by sqlite3PagerDontWrite()).
2893 */
2894 if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
2895 i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */
2896 char *pData; /* Data to write */
2897
2898 /* Encode the database */
2899 CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData);
2900
2901 /* Write out the page data. */
2902 rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
2903
2904 /* If page 1 was just written, update Pager.dbFileVers to match
2905 ** the value now stored in the database file. If writing this
2906 ** page caused the database file to grow, update dbFileSize.
2907 */
2908 if( pgno==1 ){
2909 memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
2910 }
2911 if( pgno>pPager->dbFileSize ){
2912 pPager->dbFileSize = pgno;
2913 }
2914
2915 /* Update any backup objects copying the contents of this pager. */
2916 sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData);
2917
2918 PAGERTRACE(("STORE %d page %d hash(%08x)\n",
2919 PAGERID(pPager), pgno, pager_pagehash(pList)));
2920 IOTRACE(("PGOUT %p %d\n", pPager, pgno));
2921 PAGER_INCR(sqlite3_pager_writedb_count);
2922 PAGER_INCR(pPager->nWrite);
2923 }else{
2924 PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno));
2925 }
2926 #ifdef SQLITE_CHECK_PAGES
2927 pList->pageHash = pager_pagehash(pList);
2928 #endif
2929 pList = pList->pDirty;
2930 }
2931
2932 return rc;
2933 }
2934
2935 /*
2936 ** Append a record of the current state of page pPg to the sub-journal.
2937 ** It is the callers responsibility to use subjRequiresPage() to check
2938 ** that it is really required before calling this function.
2939 **
2940 ** If successful, set the bit corresponding to pPg->pgno in the bitvecs
2941 ** for all open savepoints before returning.
2942 **
2943 ** This function returns SQLITE_OK if everything is successful, an IO
2944 ** error code if the attempt to write to the sub-journal fails, or
2945 ** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint
2946 ** bitvec.
2947 */
2948 static int subjournalPage(PgHdr *pPg){
2949 int rc = SQLITE_OK;
2950 Pager *pPager = pPg->pPager;
2951 if( isOpen(pPager->sjfd) ){
2952 void *pData = pPg->pData;
2953 i64 offset = pPager->nSubRec*(4+pPager->pageSize);
2954 char *pData2;
2955
2956 CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
2957 PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
2958
2959 assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
2960 rc = write32bits(pPager->sjfd, offset, pPg->pgno);
2961 if( rc==SQLITE_OK ){
2962 rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
2963 }
2964 }
2965 if( rc==SQLITE_OK ){
2966 pPager->nSubRec++;
2967 assert( pPager->nSavepoint>0 );
2968 rc = addToSavepointBitvecs(pPager, pPg->pgno);
2969 }
2970 return rc;
2971 }
2972
2973
2974 /*
2975 ** This function is called by the pcache layer when it has reached some
2976 ** soft memory limit. The first argument is a pointer to a Pager object
2977 ** (cast as a void*). The pager is always 'purgeable' (not an in-memory
2978 ** database). The second argument is a reference to a page that is
2979 ** currently dirty but has no outstanding references. The page
2980 ** is always associated with the Pager object passed as the first
2981 ** argument.
2982 **
2983 ** The job of this function is to make pPg clean by writing its contents
2984 ** out to the database file, if possible. This may involve syncing the
2985 ** journal file.
2986 **
2987 ** If successful, sqlite3PcacheMakeClean() is called on the page and
2988 ** SQLITE_OK returned. If an IO error occurs while trying to make the
2989 ** page clean, the IO error code is returned. If the page cannot be
2990 ** made clean for some other reason, but no error occurs, then SQLITE_OK
2991 ** is returned by sqlite3PcacheMakeClean() is not called.
2992 */
2993 static int pagerStress(void *p, PgHdr *pPg){
2994 Pager *pPager = (Pager *)p;
2995 int rc = SQLITE_OK;
2996
2997 assert( pPg->pPager==pPager );
2998 assert( pPg->flags&PGHDR_DIRTY );
2999
3000 /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
3001 ** is journalling a set of two or more database pages that are stored
3002 ** on the same disk sector. Syncing the journal is not allowed while
3003 ** this is happening as it is important that all members of such a
3004 ** set of pages are synced to disk together. So, if the page this function
3005 ** is trying to make clean will require a journal sync and the doNotSync
3006 ** flag is set, return without doing anything. The pcache layer will
3007 ** just have to go ahead and allocate a new page buffer instead of
3008 ** reusing pPg.
3009 **
3010 ** Similarly, if the pager has already entered the error state, do not
3011 ** try to write the contents of pPg to disk.
3012 */
3013 if( NEVER(pPager->errCode)
3014 || (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
3015 ){
3016 return SQLITE_OK;
3017 }
3018
3019 /* Sync the journal file if required. */
3020 if( pPg->flags&PGHDR_NEED_SYNC ){
3021 rc = syncJournal(pPager);
3022 if( rc==SQLITE_OK && pPager->fullSync &&
3023 !(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
3024 !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
3025 ){
3026 pPager->nRec = 0;
3027 rc = writeJournalHdr(pPager);
3028 }
3029 }
3030
3031 /* If the page number of this page is larger than the current size of
3032 ** the database image, it may need to be written to the sub-journal.
3033 ** This is because the call to pager_write_pagelist() below will not
3034 ** actually write data to the file in this case.
3035 **
3036 ** Consider the following sequence of events:
3037 **
3038 ** BEGIN;
3039 ** <journal page X>
3040 ** <modify page X>
3041 ** SAVEPOINT sp;
3042 ** <shrink database file to Y pages>
3043 ** pagerStress(page X)
3044 ** ROLLBACK TO sp;
3045 **
3046 ** If (X>Y), then when pagerStress is called page X will not be written
3047 ** out to the database file, but will be dropped from the cache. Then,
3048 ** following the "ROLLBACK TO sp" statement, reading page X will read
3049 ** data from the database file. This will be the copy of page X as it
3050 ** was when the transaction started, not as it was when "SAVEPOINT sp"
3051 ** was executed.
3052 **
3053 ** The solution is to write the current data for page X into the
3054 ** sub-journal file now (if it is not already there), so that it will
3055 ** be restored to its current value when the "ROLLBACK TO sp" is
3056 ** executed.
3057 */
3058 if( NEVER(
3059 rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
3060 ) ){
3061 rc = subjournalPage(pPg);
3062 }
3063
3064 /* Write the contents of the page out to the database file. */
3065 if( rc==SQLITE_OK ){
3066 pPg->pDirty = 0;
3067 rc = pager_write_pagelist(pPg);
3068 }
3069
3070 /* Mark the page as clean. */
3071 if( rc==SQLITE_OK ){
3072 PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
3073 sqlite3PcacheMakeClean(pPg);
3074 }
3075
3076 return pager_error(pPager, rc);
3077 }
3078
3079
3080 /*
3081 ** Allocate and initialize a new Pager object and put a pointer to it
3082 ** in *ppPager. The pager should eventually be freed by passing it
3083 ** to sqlite3PagerClose().
3084 **
3085 ** The zFilename argument is the path to the database file to open.
3086 ** If zFilename is NULL then a randomly-named temporary file is created
3087 ** and used as the file to be cached. Temporary files are be deleted
3088 ** automatically when they are closed. If zFilename is ":memory:" then
3089 ** all information is held in cache. It is never written to disk.
3090 ** This can be used to implement an in-memory database.
3091 **
3092 ** The nExtra parameter specifies the number of bytes of space allocated
3093 ** along with each page reference. This space is available to the user
3094 ** via the sqlite3PagerGetExtra() API.
3095 **
3096 ** The flags argument is used to specify properties that affect the
3097 ** operation of the pager. It should be passed some bitwise combination
3098 ** of the PAGER_OMIT_JOURNAL and PAGER_NO_READLOCK flags.
3099 **
3100 ** The vfsFlags parameter is a bitmask to pass to the flags parameter
3101 ** of the xOpen() method of the supplied VFS when opening files.
3102 **
3103 ** If the pager object is allocated and the specified file opened
3104 ** successfully, SQLITE_OK is returned and *ppPager set to point to
3105 ** the new pager object. If an error occurs, *ppPager is set to NULL
3106 ** and error code returned. This function may return SQLITE_NOMEM
3107 ** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or
3108 ** various SQLITE_IO_XXX errors.
3109 */
3110 int sqlite3PagerOpen(
3111 sqlite3_vfs *pVfs, /* The virtual file system to use */
3112 Pager **ppPager, /* OUT: Return the Pager structure here */
3113 const char *zFilename, /* Name of the database file to open */
3114 int nExtra, /* Extra bytes append to each in-memory page */
3115 int flags, /* flags controlling this file */
3116 int vfsFlags, /* flags passed through to sqlite3_vfs.xOpen() */
3117 void (*xReinit)(DbPage*) /* Function to reinitialize pages */
3118 ){
3119 u8 *pPtr;
3120 Pager *pPager = 0; /* Pager object to allocate and return */
3121 int rc = SQLITE_OK; /* Return code */
3122 int tempFile = 0; /* True for temp files (incl. in-memory files) */
3123 int memDb = 0; /* True if this is an in-memory file */
3124 int readOnly = 0; /* True if this is a read-only file */
3125 int journalFileSize; /* Bytes to allocate for each journal fd */
3126 char *zPathname = 0; /* Full path to database file */
3127 int nPathname = 0; /* Number of bytes in zPathname */
3128 int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */
3129 int noReadlock = (flags & PAGER_NO_READLOCK)!=0; /* True to omit read-lock */
3130 int pcacheSize = sqlite3PcacheSize(); /* Bytes to allocate for PCache */
3131 u16 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; /* Default page size */
3132
3133 /* Figure out how much space is required for each journal file-handle
3134 ** (there are two of them, the main journal and the sub-journal). This
3135 ** is the maximum space required for an in-memory journal file handle
3136 ** and a regular journal file-handle. Note that a "regular journal-handle"
3137 ** may be a wrapper capable of caching the first portion of the journal
3138 ** file in memory to implement the atomic-write optimization (see
3139 ** source file journal.c).
3140 */
3141 if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){
3142 journalFileSize = ROUND8(sqlite3JournalSize(pVfs));
3143 }else{
3144 journalFileSize = ROUND8(sqlite3MemJournalSize());
3145 }
3146
3147 /* Set the output variable to NULL in case an error occurs. */
3148 *ppPager = 0;
3149
3150 /* Compute and store the full pathname in an allocated buffer pointed
3151 ** to by zPathname, length nPathname. Or, if this is a temporary file,
3152 ** leave both nPathname and zPathname set to 0.
3153 */
3154 if( zFilename && zFilename[0] ){
3155 nPathname = pVfs->mxPathname+1;
3156 zPathname = sqlite3Malloc(nPathname*2);
3157 if( zPathname==0 ){
3158 return SQLITE_NOMEM;
3159 }
3160 #ifndef SQLITE_OMIT_MEMORYDB
3161 if( strcmp(zFilename,":memory:")==0 ){
3162 memDb = 1;
3163 zPathname[0] = 0;
3164 }else
3165 #endif
3166 {
3167 zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */
3168 rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
3169 }
3170
3171 nPathname = sqlite3Strlen30(zPathname);
3172 if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){
3173 /* This branch is taken when the journal path required by
3174 ** the database being opened will be more than pVfs->mxPathname
3175 ** bytes in length. This means the database cannot be opened,
3176 ** as it will not be possible to open the journal file or even
3177 ** check for a hot-journal before reading.
3178 */
3179 rc = SQLITE_CANTOPEN;
3180 }
3181 if( rc!=SQLITE_OK ){
3182 sqlite3_free(zPathname);
3183 return rc;
3184 }
3185 }
3186
3187 /* Allocate memory for the Pager structure, PCache object, the
3188 ** three file descriptors, the database file name and the journal
3189 ** file name. The layout in memory is as follows:
3190 **
3191 ** Pager object (sizeof(Pager) bytes)
3192 ** PCache object (sqlite3PcacheSize() bytes)
3193 ** Database file handle (pVfs->szOsFile bytes)
3194 ** Sub-journal file handle (journalFileSize bytes)
3195 ** Main journal file handle (journalFileSize bytes)
3196 ** Database file name (nPathname+1 bytes)
3197 ** Journal file name (nPathname+8+1 bytes)
3198 */
3199 pPtr = (u8 *)sqlite3MallocZero(
3200 ROUND8(sizeof(*pPager)) + /* Pager structure */
3201 ROUND8(pcacheSize) + /* PCache object */
3202 ROUND8(pVfs->szOsFile) + /* The main db file */
3203 journalFileSize * 2 + /* The two journal files */
3204 nPathname + 1 + /* zFilename */
3205 nPathname + 8 + 1 /* zJournal */
3206 );
3207 assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) );
3208 if( !pPtr ){
3209 sqlite3_free(zPathname);
3210 return SQLITE_NOMEM;
3211 }
3212 pPager = (Pager*)(pPtr);
3213 pPager->pPCache = (PCache*)(pPtr += ROUND8(sizeof(*pPager)));
3214 pPager->fd = (sqlite3_file*)(pPtr += ROUND8(pcacheSize));
3215 pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile));
3216 pPager->jfd = (sqlite3_file*)(pPtr += journalFileSize);
3217 pPager->zFilename = (char*)(pPtr += journalFileSize);
3218 assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) );
3219
3220 /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */
3221 if( zPathname ){
3222 pPager->zJournal = (char*)(pPtr += nPathname + 1);
3223 memcpy(pPager->zFilename, zPathname, nPathname);
3224 memcpy(pPager->zJournal, zPathname, nPathname);
3225 memcpy(&pPager->zJournal[nPathname], "-journal", 8);
3226 if( pPager->zFilename[0]==0 ) pPager->zJournal[0] = 0;
3227 sqlite3_free(zPathname);
3228 }
3229 pPager->pVfs = pVfs;
3230 pPager->vfsFlags = vfsFlags;
3231
3232 /* Open the pager file.
3233 */
3234 if( zFilename && zFilename[0] && !memDb ){
3235 int fout = 0; /* VFS flags returned by xOpen() */
3236 rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout);
3237 readOnly = (fout&SQLITE_OPEN_READONLY);
3238
3239 /* If the file was successfully opened for read/write access,
3240 ** choose a default page size in case we have to create the
3241 ** database file. The default page size is the maximum of:
3242 **
3243 ** + SQLITE_DEFAULT_PAGE_SIZE,
3244 ** + The value returned by sqlite3OsSectorSize()
3245 ** + The largest page size that can be written atomically.
3246 */
3247 if( rc==SQLITE_OK && !readOnly ){
3248 setSectorSize(pPager);
3249 assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE);
3250 if( szPageDflt<pPager->sectorSize ){
3251 if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
3252 szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
3253 }else{
3254 szPageDflt = (u16)pPager->sectorSize;
3255 }
3256 }
3257 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
3258 {
3259 int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
3260 int ii;
3261 assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
3262 assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
3263 assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
3264 for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
3265 if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){
3266 szPageDflt = ii;
3267 }
3268 }
3269 }
3270 #endif
3271 }
3272 }else{
3273 /* If a temporary file is requested, it is not opened immediately.
3274 ** In this case we accept the default page size and delay actually
3275 ** opening the file until the first call to OsWrite().
3276 **
3277 ** This branch is also run for an in-memory database. An in-memory
3278 ** database is the same as a temp-file that is never written out to
3279 ** disk and uses an in-memory rollback journal.
3280 */
3281 tempFile = 1;
3282 pPager->state = PAGER_EXCLUSIVE;
3283 readOnly = (vfsFlags&SQLITE_OPEN_READONLY);
3284 }
3285
3286 /* The following call to PagerSetPagesize() serves to set the value of
3287 ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer.
3288 */
3289 if( rc==SQLITE_OK ){
3290 assert( pPager->memDb==0 );
3291 rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
3292 testcase( rc!=SQLITE_OK );
3293 }
3294
3295 /* If an error occurred in either of the blocks above, free the
3296 ** Pager structure and close the file.
3297 */
3298 if( rc!=SQLITE_OK ){
3299 assert( !pPager->pTmpSpace );
3300 sqlite3OsClose(pPager->fd);
3301 sqlite3_free(pPager);
3302 return rc;
3303 }
3304
3305 /* Initialize the PCache object. */
3306 assert( nExtra<1000 );
3307 nExtra = ROUND8(nExtra);
3308 sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
3309 !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
3310
3311 PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
3312 IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
3313
3314 pPager->useJournal = (u8)useJournal;
3315 pPager->noReadlock = (noReadlock && readOnly) ?1:0;
3316 /* pPager->stmtOpen = 0; */
3317 /* pPager->stmtInUse = 0; */
3318 /* pPager->nRef = 0; */
3319 pPager->dbSizeValid = (u8)memDb;
3320 /* pPager->stmtSize = 0; */
3321 /* pPager->stmtJSize = 0; */
3322 /* pPager->nPage = 0; */
3323 pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
3324 /* pPager->state = PAGER_UNLOCK; */
3325 assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
3326 /* pPager->errMask = 0; */
3327 pPager->tempFile = (u8)tempFile;
3328 assert( tempFile==PAGER_LOCKINGMODE_NORMAL
3329 || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
3330 assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
3331 pPager->exclusiveMode = (u8)tempFile;
3332 pPager->changeCountDone = pPager->tempFile;
3333 pPager->memDb = (u8)memDb;
3334 pPager->readOnly = (u8)readOnly;
3335 /* pPager->needSync = 0; */
3336 assert( useJournal || pPager->tempFile );
3337 pPager->noSync = pPager->tempFile;
3338 pPager->fullSync = pPager->noSync ?0:1;
3339 pPager->sync_flags = SQLITE_SYNC_NORMAL;
3340 /* pPager->pFirst = 0; */
3341 /* pPager->pFirstSynced = 0; */
3342 /* pPager->pLast = 0; */
3343 pPager->nExtra = (u16)nExtra;
3344 pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
3345 assert( isOpen(pPager->fd) || tempFile );
3346 setSectorSize(pPager);
3347 if( !useJournal ){
3348 pPager->journalMode = PAGER_JOURNALMODE_OFF;
3349 }else if( memDb ){
3350 pPager->journalMode = PAGER_JOURNALMODE_MEMORY;
3351 }
3352 /* pPager->xBusyHandler = 0; */
3353 /* pPager->pBusyHandlerArg = 0; */
3354 pPager->xReiniter = xReinit;
3355 /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
3356 *ppPager = pPager;
3357 return SQLITE_OK;
3358 }
3359
3360
3361
3362 /*
3363 ** This function is called after transitioning from PAGER_UNLOCK to
3364 ** PAGER_SHARED state. It tests if there is a hot journal present in
3365 ** the file-system for the given pager. A hot journal is one that
3366 ** needs to be played back. According to this function, a hot-journal
3367 ** file exists if the following criteria are met:
3368 **
3369 ** * The journal file exists in the file system, and
3370 ** * No process holds a RESERVED or greater lock on the database file, and
3371 ** * The database file itself is greater than 0 bytes in size, and
3372 ** * The first byte of the journal file exists and is not 0x00.
3373 **
3374 ** If the current size of the database file is 0 but a journal file
3375 ** exists, that is probably an old journal left over from a prior
3376 ** database with the same name. In this case the journal file is
3377 ** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK
3378 ** is returned.
3379 **
3380 ** This routine does not check if there is a master journal filename
3381 ** at the end of the file. If there is, and that master journal file
3382 ** does not exist, then the journal file is not really hot. In this
3383 ** case this routine will return a false-positive. The pager_playback()
3384 ** routine will discover that the journal file is not really hot and
3385 ** will not roll it back.
3386 **
3387 ** If a hot-journal file is found to exist, *pExists is set to 1 and
3388 ** SQLITE_OK returned. If no hot-journal file is present, *pExists is
3389 ** set to 0 and SQLITE_OK returned. If an IO error occurs while trying
3390 ** to determine whether or not a hot-journal file exists, the IO error
3391 ** code is returned and the value of *pExists is undefined.
3392 */
3393 static int hasHotJournal(Pager *pPager, int *pExists){
3394 sqlite3_vfs * const pVfs = pPager->pVfs;
3395 int rc; /* Return code */
3396 int exists; /* True if a journal file is present */
3397
3398 assert( pPager!=0 );
3399 assert( pPager->useJournal );
3400 assert( isOpen(pPager->fd) );
3401 assert( !isOpen(pPager->jfd) );
3402 assert( pPager->state <= PAGER_SHARED );
3403
3404 *pExists = 0;
3405 rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
3406 if( rc==SQLITE_OK && exists ){
3407 int locked; /* True if some process holds a RESERVED lock */
3408
3409 /* Race condition here: Another process might have been holding the
3410 ** the RESERVED lock and have a journal open at the sqlite3OsAccess()
3411 ** call above, but then delete the journal and drop the lock before
3412 ** we get to the following sqlite3OsCheckReservedLock() call. If that
3413 ** is the case, this routine might think there is a hot journal when
3414 ** in fact there is none. This results in a false-positive which will
3415 ** be dealt with by the playback routine. Ticket #3883.
3416 */
3417 rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
3418 if( rc==SQLITE_OK && !locked ){
3419 int nPage;
3420
3421 /* Check the size of the database file. If it consists of 0 pages,
3422 ** then delete the journal file. See the header comment above for
3423 ** the reasoning here. Delete the obsolete journal file under
3424 ** a RESERVED lock to avoid race conditions and to avoid violating
3425 ** [H33020].
3426 */
3427 rc = sqlite3PagerPagecount(pPager, &nPage);
3428 if( rc==SQLITE_OK ){
3429 if( nPage==0 ){
3430 sqlite3BeginBenignMalloc();
3431 if( sqlite3OsLock(pPager->fd, RESERVED_LOCK)==SQLITE_OK ){
3432 sqlite3OsDelete(pVfs, pPager->zJournal, 0);
3433 sqlite3OsUnlock(pPager->fd, SHARED_LOCK);
3434 }
3435 sqlite3EndBenignMalloc();
3436 }else{
3437 /* The journal file exists and no other connection has a reserved
3438 ** or greater lock on the database file. Now check that there is
3439 ** at least one non-zero bytes at the start of the journal file.
3440 ** If there is, then we consider this journal to be hot. If not,
3441 ** it can be ignored.
3442 */
3443 int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL;
3444 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f);
3445 if( rc==SQLITE_OK ){
3446 u8 first = 0;
3447 rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0);
3448 if( rc==SQLITE_IOERR_SHORT_READ ){
3449 rc = SQLITE_OK;
3450 }
3451 sqlite3OsClose(pPager->jfd);
3452 *pExists = (first!=0);
3453 }else if( rc==SQLITE_CANTOPEN ){
3454 /* If we cannot open the rollback journal file in order to see if
3455 ** its has a zero header, that might be due to an I/O error, or
3456 ** it might be due to the race condition described above and in
3457 ** ticket #3883. Either way, assume that the journal is hot.
3458 ** This might be a false positive. But if it is, then the
3459 ** automatic journal playback and recovery mechanism will deal
3460 ** with it under an EXCLUSIVE lock where we do not need to
3461 ** worry so much with race conditions.
3462 */
3463 *pExists = 1;
3464 rc = SQLITE_OK;
3465 }
3466 }
3467 }
3468 }
3469 }
3470
3471 return rc;
3472 }
3473
3474 /*
3475 ** Read the content for page pPg out of the database file and into
3476 ** pPg->pData. A shared lock or greater must be held on the database
3477 ** file before this function is called.
3478 **
3479 ** If page 1 is read, then the value of Pager.dbFileVers[] is set to
3480 ** the value read from the database file.
3481 **
3482 ** If an IO error occurs, then the IO error is returned to the caller.
3483 ** Otherwise, SQLITE_OK is returned.
3484 */
3485 static int readDbPage(PgHdr *pPg){
3486 Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
3487 Pgno pgno = pPg->pgno; /* Page number to read */
3488 int rc; /* Return code */
3489 i64 iOffset; /* Byte offset of file to read from */
3490
3491 assert( pPager->state>=PAGER_SHARED && !MEMDB );
3492 assert( isOpen(pPager->fd) );
3493
3494 if( NEVER(!isOpen(pPager->fd)) ){
3495 assert( pPager->tempFile );
3496 memset(pPg->pData, 0, pPager->pageSize);
3497 return SQLITE_OK;
3498 }
3499 iOffset = (pgno-1)*(i64)pPager->pageSize;
3500 rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset);
3501 if( rc==SQLITE_IOERR_SHORT_READ ){
3502 rc = SQLITE_OK;
3503 }
3504 if( pgno==1 ){
3505 u8 *dbFileVers = &((u8*)pPg->pData)[24];
3506 memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
3507 }
3508 CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);
3509
3510 PAGER_INCR(sqlite3_pager_readdb_count);
3511 PAGER_INCR(pPager->nRead);
3512 IOTRACE(("PGIN %p %d\n", pPager, pgno));
3513 PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
3514 PAGERID(pPager), pgno, pager_pagehash(pPg)));
3515
3516 return rc;
3517 }
3518
3519 /*
3520 ** This function is called to obtain a shared lock on the database file.
3521 ** It is illegal to call sqlite3PagerAcquire() until after this function
3522 ** has been successfully called. If a shared-lock is already held when
3523 ** this function is called, it is a no-op.
3524 **
3525 ** The following operations are also performed by this function.
3526 **
3527 ** 1) If the pager is currently in PAGER_UNLOCK state (no lock held
3528 ** on the database file), then an attempt is made to obtain a
3529 ** SHARED lock on the database file. Immediately after obtaining
3530 ** the SHARED lock, the file-system is checked for a hot-journal,
3531 ** which is played back if present. Following any hot-journal
3532 ** rollback, the contents of the cache are validated by checking
3533 ** the 'change-counter' field of the database file header and
3534 ** discarded if they are found to be invalid.
3535 **
3536 ** 2) If the pager is running in exclusive-mode, and there are currently
3537 ** no outstanding references to any pages, and is in the error state,
3538 ** then an attempt is made to clear the error state by discarding
3539 ** the contents of the page cache and rolling back any open journal
3540 ** file.
3541 **
3542 ** If the operation described by (2) above is not attempted, and if the
3543 ** pager is in an error state other than SQLITE_FULL when this is called,
3544 ** the error state error code is returned. It is permitted to read the
3545 ** database when in SQLITE_FULL error state.
3546 **
3547 ** Otherwise, if everything is successful, SQLITE_OK is returned. If an
3548 ** IO error occurs while locking the database, checking for a hot-journal
3549 ** file or rolling back a journal file, the IO error code is returned.
3550 */
3551 int sqlite3PagerSharedLock(Pager *pPager){
3552 int rc = SQLITE_OK; /* Return code */
3553 int isErrorReset = 0; /* True if recovering from error state */
3554
3555 /* This routine is only called from b-tree and only when there are no
3556 ** outstanding pages */
3557 assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
3558 if( NEVER(MEMDB && pPager->errCode) ){ return pPager->errCode; }
3559
3560 /* If this database is in an error-state, now is a chance to clear
3561 ** the error. Discard the contents of the pager-cache and rollback
3562 ** any hot journal in the file-system.
3563 */
3564 if( pPager->errCode ){
3565 if( isOpen(pPager->jfd) || pPager->zJournal ){
3566 isErrorReset = 1;
3567 }
3568 pPager->errCode = SQLITE_OK;
3569 pager_reset(pPager);
3570 }
3571
3572 if( pPager->state==PAGER_UNLOCK || isErrorReset ){
3573 sqlite3_vfs * const pVfs = pPager->pVfs;
3574 int isHotJournal = 0;
3575 assert( !MEMDB );
3576 assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
3577 if( pPager->noReadlock ){
3578 assert( pPager->readOnly );
3579 pPager->state = PAGER_SHARED;
3580 }else{
3581 rc = pager_wait_on_lock(pPager, SHARED_LOCK);
3582 if( rc!=SQLITE_OK ){
3583 assert( pPager->state==PAGER_UNLOCK );
3584 return pager_error(pPager, rc);
3585 }
3586 }
3587 assert( pPager->state>=SHARED_LOCK );
3588
3589 /* If a journal file exists, and there is no RESERVED lock on the
3590 ** database file, then it either needs to be played back or deleted.
3591 */
3592 if( !isErrorReset ){
3593 assert( pPager->state <= PAGER_SHARED );
3594 rc = hasHotJournal(pPager, &isHotJournal);
3595 if( rc!=SQLITE_OK ){
3596 goto failed;
3597 }
3598 }
3599 if( isErrorReset || isHotJournal ){
3600 /* Get an EXCLUSIVE lock on the database file. At this point it is
3601 ** important that a RESERVED lock is not obtained on the way to the
3602 ** EXCLUSIVE lock. If it were, another process might open the
3603 ** database file, detect the RESERVED lock, and conclude that the
3604 ** database is safe to read while this process is still rolling the
3605 ** hot-journal back.
3606 **
3607 ** Because the intermediate RESERVED lock is not requested, any
3608 ** other process attempting to access the database file will get to
3609 ** this point in the code and fail to obtain its own EXCLUSIVE lock
3610 ** on the database file.
3611 */
3612 if( pPager->state<EXCLUSIVE_LOCK ){
3613 rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
3614 if( rc!=SQLITE_OK ){
3615 rc = pager_error(pPager, rc);
3616 goto failed;
3617 }
3618 pPager->state = PAGER_EXCLUSIVE;
3619 }
3620
3621 /* Open the journal for read/write access. This is because in
3622 ** exclusive-access mode the file descriptor will be kept open and
3623 ** possibly used for a transaction later on. On some systems, the
3624 ** OsTruncate() call used in exclusive-access mode also requires
3625 ** a read/write file handle.
3626 */
3627 if( !isOpen(pPager->jfd) ){
3628 int res;
3629 rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
3630 if( rc==SQLITE_OK ){
3631 if( res ){
3632 int fout = 0;
3633 int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
3634 assert( !pPager->tempFile );
3635 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
3636 assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
3637 if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){
3638 rc = SQLITE_CANTOPEN;
3639 sqlite3OsClose(pPager->jfd);
3640 }
3641 }else{
3642 /* If the journal does not exist, it usually means that some
3643 ** other connection managed to get in and roll it back before
3644 ** this connection obtained the exclusive lock above. Or, it
3645 ** may mean that the pager was in the error-state when this
3646 ** function was called and the journal file does not exist. */
3647 rc = pager_end_transaction(pPager, 0);
3648 }
3649 }
3650 }
3651 if( rc!=SQLITE_OK ){
3652 goto failed;
3653 }
3654
3655 /* TODO: Why are these cleared here? Is it necessary? */
3656 pPager->journalStarted = 0;
3657 pPager->journalOff = 0;
3658 pPager->setMaster = 0;
3659 pPager->journalHdr = 0;
3660
3661 /* Playback and delete the journal. Drop the database write
3662 ** lock and reacquire the read lock. Purge the cache before
3663 ** playing back the hot-journal so that we don't end up with
3664 ** an inconsistent cache.
3665 */
3666 if( isOpen(pPager->jfd) ){
3667 rc = pager_playback(pPager, 1);
3668 if( rc!=SQLITE_OK ){
3669 rc = pager_error(pPager, rc);
3670 goto failed;
3671 }
3672 }
3673 assert( (pPager->state==PAGER_SHARED)
3674 || (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
3675 );
3676 }
3677
3678 if( pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0 ){
3679 /* The shared-lock has just been acquired on the database file
3680 ** and there are already pages in the cache (from a previous
3681 ** read or write transaction). Check to see if the database
3682 ** has been modified. If the database has changed, flush the
3683 ** cache.
3684 **
3685 ** Database changes is detected by looking at 15 bytes beginning
3686 ** at offset 24 into the file. The first 4 of these 16 bytes are
3687 ** a 32-bit counter that is incremented with each change. The
3688 ** other bytes change randomly with each file change when
3689 ** a codec is in use.
3690 **
3691 ** There is a vanishingly small chance that a change will not be
3692 ** detected. The chance of an undetected change is so small that
3693 ** it can be neglected.
3694 */
3695 char dbFileVers[sizeof(pPager->dbFileVers)];
3696 sqlite3PagerPagecount(pPager, 0);
3697
3698 if( pPager->errCode ){
3699 rc = pPager->errCode;
3700 goto failed;
3701 }
3702
3703 assert( pPager->dbSizeValid );
3704 if( pPager->dbSize>0 ){
3705 IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
3706 rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
3707 if( rc!=SQLITE_OK ){
3708 goto failed;
3709 }
3710 }else{
3711 memset(dbFileVers, 0, sizeof(dbFileVers));
3712 }
3713
3714 if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
3715 pager_reset(pPager);
3716 }
3717 }
3718 assert( pPager->exclusiveMode || pPager->state==PAGER_SHARED );
3719 }
3720
3721 failed:
3722 if( rc!=SQLITE_OK ){
3723 /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
3724 pager_unlock(pPager);
3725 }
3726 return rc;
3727 }
3728
3729 /*
3730 ** If the reference count has reached zero, rollback any active
3731 ** transaction and unlock the pager.
3732 **
3733 ** Except, in locking_mode=EXCLUSIVE when there is nothing to in
3734 ** the rollback journal, the unlock is not performed and there is
3735 ** nothing to rollback, so this routine is a no-op.
3736 */
3737 static void pagerUnlockIfUnused(Pager *pPager){
3738 if( (sqlite3PcacheRefCount(pPager->pPCache)==0)
3739 && (!pPager->exclusiveMode || pPager->journalOff>0)
3740 ){
3741 pagerUnlockAndRollback(pPager);
3742 }
3743 }
3744
3745 /*
3746 ** Acquire a reference to page number pgno in pager pPager (a page
3747 ** reference has type DbPage*). If the requested reference is
3748 ** successfully obtained, it is copied to *ppPage and SQLITE_OK returned.
3749 **
3750 ** If the requested page is already in the cache, it is returned.
3751 ** Otherwise, a new page object is allocated and populated with data
3752 ** read from the database file. In some cases, the pcache module may
3753 ** choose not to allocate a new page object and may reuse an existing
3754 ** object with no outstanding references.
3755 **
3756 ** The extra data appended to a page is always initialized to zeros the
3757 ** first time a page is loaded into memory. If the page requested is
3758 ** already in the cache when this function is called, then the extra
3759 ** data is left as it was when the page object was last used.
3760 **
3761 ** If the database image is smaller than the requested page or if a
3762 ** non-zero value is passed as the noContent parameter and the
3763 ** requested page is not already stored in the cache, then no
3764 ** actual disk read occurs. In this case the memory image of the
3765 ** page is initialized to all zeros.
3766 **
3767 ** If noContent is true, it means that we do not care about the contents
3768 ** of the page. This occurs in two seperate scenarios:
3769 **
3770 ** a) When reading a free-list leaf page from the database, and
3771 **
3772 ** b) When a savepoint is being rolled back and we need to load
3773 ** a new page into the cache to populate with the data read
3774 ** from the savepoint journal.
3775 **
3776 ** If noContent is true, then the data returned is zeroed instead of
3777 ** being read from the database. Additionally, the bits corresponding
3778 ** to pgno in Pager.pInJournal (bitvec of pages already written to the
3779 ** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open
3780 ** savepoints are set. This means if the page is made writable at any
3781 ** point in the future, using a call to sqlite3PagerWrite(), its contents
3782 ** will not be journaled. This saves IO.
3783 **
3784 ** The acquisition might fail for several reasons. In all cases,
3785 ** an appropriate error code is returned and *ppPage is set to NULL.
3786 **
3787 ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt
3788 ** to find a page in the in-memory cache first. If the page is not already
3789 ** in memory, this routine goes to disk to read it in whereas Lookup()
3790 ** just returns 0. This routine acquires a read-lock the first time it
3791 ** has to go to disk, and could also playback an old journal if necessary.
3792 ** Since Lookup() never goes to disk, it never has to deal with locks
3793 ** or journal files.
3794 */
3795 int sqlite3PagerAcquire(
3796 Pager *pPager, /* The pager open on the database file */
3797 Pgno pgno, /* Page number to fetch */
3798 DbPage **ppPage, /* Write a pointer to the page here */
3799 int noContent /* Do not bother reading content from disk if true */
3800 ){
3801 /* This just passes through to our modified version with NULL data. */
3802 return sqlite3PagerAcquire2(pPager, pgno, ppPage, noContent, 0);
3803 }
3804
3805 /*
3806 ** This is an internal version of sqlite3PagerAcquire that takes an extra
3807 ** parameter of data to use to fill the page with. This allows more efficient
3808 ** filling for preloaded data. If this extra parameter is NULL, we'll go to
3809 ** the file.
3810 **
3811 ** See sqlite3PagerLoadall which uses this function.
3812 */
3813 int sqlite3PagerAcquire2(
3814 Pager *pPager, /* The pager open on the database file */
3815 Pgno pgno, /* Page number to fetch */
3816 DbPage **ppPage, /* Write a pointer to the page here */
3817 int noContent, /* Do not bother reading content from disk if true */
3818 unsigned char* pDataToFill
3819 ){
3820 int rc;
3821 PgHdr *pPg;
3822
3823 assert( assert_pager_state(pPager) );
3824 assert( pPager->state>PAGER_UNLOCK );
3825
3826 if( pgno==0 ){
3827 return SQLITE_CORRUPT_BKPT;
3828 }
3829
3830 /* If the pager is in the error state, return an error immediately.
3831 ** Otherwise, request the page from the PCache layer. */
3832 if( pPager->errCode!=SQLITE_OK && pPager->errCode!=SQLITE_FULL ){
3833 rc = pPager->errCode;
3834 }else{
3835 rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
3836 }
3837
3838 if( rc!=SQLITE_OK ){
3839 /* Either the call to sqlite3PcacheFetch() returned an error or the
3840 ** pager was already in the error-state when this function was called.
3841 ** Set pPg to 0 and jump to the exception handler. */
3842 pPg = 0;
3843 goto pager_acquire_err;
3844 }
3845 assert( (*ppPage)->pgno==pgno );
3846 assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 );
3847
3848 if( (*ppPage)->pPager ){
3849 /* In this case the pcache already contains an initialized copy of
3850 ** the page. Return without further ado. */
3851 assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
3852 PAGER_INCR(pPager->nHit);
3853 return SQLITE_OK;
3854
3855 }else{
3856 /* The pager cache has created a new page. Its content needs to
3857 ** be initialized. */
3858 int nMax;
3859
3860 PAGER_INCR(pPager->nMiss);
3861 pPg = *ppPage;
3862 pPg->pPager = pPager;
3863
3864 /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
3865 ** number greater than this, or the unused locking-page, is requested. */
3866 if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){
3867 rc = SQLITE_CORRUPT_BKPT;
3868 goto pager_acquire_err;
3869 }
3870
3871 rc = sqlite3PagerPagecount(pPager, &nMax);
3872 if( rc!=SQLITE_OK ){
3873 goto pager_acquire_err;
3874 }
3875
3876 if( nMax<(int)pgno || MEMDB || noContent ){
3877 if( pgno>pPager->mxPgno ){
3878 rc = SQLITE_FULL;
3879 goto pager_acquire_err;
3880 }
3881 if( noContent ){
3882 /* Failure to set the bits in the InJournal bit-vectors is benign.
3883 ** It merely means that we might do some extra work to journal a
3884 ** page that does not need to be journaled. Nevertheless, be sure
3885 ** to test the case where a malloc error occurs while trying to set
3886 ** a bit in a bit vector.
3887 */
3888 sqlite3BeginBenignMalloc();
3889 if( pgno<=pPager->dbOrigSize ){
3890 TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno);
3891 testcase( rc==SQLITE_NOMEM );
3892 }
3893 TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno);
3894 testcase( rc==SQLITE_NOMEM );
3895 sqlite3EndBenignMalloc();
3896 }else{
3897 memset(pPg->pData, 0, pPager->pageSize);
3898 }
3899 IOTRACE(("ZERO %p %d\n", pPager, pgno));
3900 }else{
3901 assert( pPg->pPager==pPager );
3902 if( pDataToFill ){
3903 /* Just copy from the given memory */
3904 memcpy(pPg->pData, pDataToFill, pPager->pageSize);
3905 CODEC1(pPager, pPg->pData, pPg->pgno, 3, rc = SQLITE_NOMEM;
3906 goto pager_acquire_err);
3907 }else{
3908 /* Load from disk (old regular sqlite code path) */
3909 rc = readDbPage(pPg);
3910 if( rc!=SQLITE_OK ){
3911 goto pager_acquire_err;
3912 }
3913 }
3914 }
3915 #ifdef SQLITE_CHECK_PAGES
3916 pPg->pageHash = pager_pagehash(pPg);
3917 #endif
3918 }
3919
3920 return SQLITE_OK;
3921
3922 pager_acquire_err:
3923 assert( rc!=SQLITE_OK );
3924 if( pPg ){
3925 sqlite3PcacheDrop(pPg);
3926 }
3927 pagerUnlockIfUnused(pPager);
3928
3929 *ppPage = 0;
3930 return rc;
3931 }
3932
3933 /*
3934 ** Acquire a page if it is already in the in-memory cache. Do
3935 ** not read the page from disk. Return a pointer to the page,
3936 ** or 0 if the page is not in cache. Also, return 0 if the
3937 ** pager is in PAGER_UNLOCK state when this function is called,
3938 ** or if the pager is in an error state other than SQLITE_FULL.
3939 **
3940 ** See also sqlite3PagerGet(). The difference between this routine
3941 ** and sqlite3PagerGet() is that _get() will go to the disk and read
3942 ** in the page if the page is not already in cache. This routine
3943 ** returns NULL if the page is not in cache or if a disk I/O error
3944 ** has ever happened.
3945 */
3946 DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
3947 PgHdr *pPg = 0;
3948 assert( pPager!=0 );
3949 assert( pgno!=0 );
3950 assert( pPager->pPCache!=0 );
3951 assert( pPager->state > PAGER_UNLOCK );
3952 sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
3953 return pPg;
3954 }
3955
3956 /*
3957 ** Release a page reference.
3958 **
3959 ** If the number of references to the page drop to zero, then the
3960 ** page is added to the LRU list. When all references to all pages
3961 ** are released, a rollback occurs and the lock on the database is
3962 ** removed.
3963 */
3964 void sqlite3PagerUnref(DbPage *pPg){
3965 if( pPg ){
3966 Pager *pPager = pPg->pPager;
3967 sqlite3PcacheRelease(pPg);
3968 pagerUnlockIfUnused(pPager);
3969 }
3970 }
3971
3972 /*
3973 ** If the main journal file has already been opened, ensure that the
3974 ** sub-journal file is open too. If the main journal is not open,
3975 ** this function is a no-op.
3976 **
3977 ** SQLITE_OK is returned if everything goes according to plan.
3978 ** An SQLITE_IOERR_XXX error code is returned if a call to
3979 ** sqlite3OsOpen() fails.
3980 */
3981 static int openSubJournal(Pager *pPager){
3982 int rc = SQLITE_OK;
3983 if( isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ){
3984 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
3985 sqlite3MemJournalOpen(pPager->sjfd);
3986 }else{
3987 rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
3988 }
3989 }
3990 return rc;
3991 }
3992
3993 /*
3994 ** This function is called at the start of every write transaction.
3995 ** There must already be a RESERVED or EXCLUSIVE lock on the database
3996 ** file when this routine is called.
3997 **
3998 ** Open the journal file for pager pPager and write a journal header
3999 ** to the start of it. If there are active savepoints, open the sub-journal
4000 ** as well. This function is only used when the journal file is being
4001 ** opened to write a rollback log for a transaction. It is not used
4002 ** when opening a hot journal file to roll it back.
4003 **
4004 ** If the journal file is already open (as it may be in exclusive mode),
4005 ** then this function just writes a journal header to the start of the
4006 ** already open file.
4007 **
4008 ** Whether or not the journal file is opened by this function, the
4009 ** Pager.pInJournal bitvec structure is allocated.
4010 **
4011 ** Return SQLITE_OK if everything is successful. Otherwise, return
4012 ** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or
4013 ** an IO error code if opening or writing the journal file fails.
4014 */
4015 static int pager_open_journal(Pager *pPager){
4016 int rc = SQLITE_OK; /* Return code */
4017 sqlite3_vfs * const pVfs = pPager->pVfs; /* Local cache of vfs pointer */
4018
4019 assert( pPager->state>=PAGER_RESERVED );
4020 assert( pPager->useJournal );
4021 assert( pPager->journalMode!=PAGER_JOURNALMODE_OFF );
4022 assert( pPager->pInJournal==0 );
4023
4024 /* If already in the error state, this function is a no-op. But on
4025 ** the other hand, this routine is never called if we are already in
4026 ** an error state. */
4027 if( NEVER(pPager->errCode) ) return pPager->errCode;
4028
4029 /* TODO: Is it really possible to get here with dbSizeValid==0? If not,
4030 ** the call to PagerPagecount() can be removed.
4031 */
4032 testcase( pPager->dbSizeValid==0 );
4033 sqlite3PagerPagecount(pPager, 0);
4034
4035 pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
4036 if( pPager->pInJournal==0 ){
4037 return SQLITE_NOMEM;
4038 }
4039
4040 /* Open the journal file if it is not already open. */
4041 if( !isOpen(pPager->jfd) ){
4042 if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
4043 sqlite3MemJournalOpen(pPager->jfd);
4044 }else{
4045 const int flags = /* VFS flags to open journal file */
4046 SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
4047 (pPager->tempFile ?
4048 (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL):
4049 (SQLITE_OPEN_MAIN_JOURNAL)
4050 );
4051 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
4052 rc = sqlite3JournalOpen(
4053 pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
4054 );
4055 #else
4056 rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
4057 #endif
4058 }
4059 assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
4060 }
4061
4062
4063 /* Write the first journal header to the journal file and open
4064 ** the sub-journal if necessary.
4065 */
4066 if( rc==SQLITE_OK ){
4067 /* TODO: Check if all of these are really required. */
4068 pPager->dbOrigSize = pPager->dbSize;
4069 pPager->journalStarted = 0;
4070 pPager->needSync = 0;
4071 pPager->nRec = 0;
4072 pPager->journalOff = 0;
4073 pPager->setMaster = 0;
4074 pPager->journalHdr = 0;
4075 rc = writeJournalHdr(pPager);
4076 }
4077 if( rc==SQLITE_OK && pPager->nSavepoint ){
4078 rc = openSubJournal(pPager);
4079 }
4080
4081 if( rc!=SQLITE_OK ){
4082 sqlite3BitvecDestroy(pPager->pInJournal);
4083 pPager->pInJournal = 0;
4084 }
4085 return rc;
4086 }
4087
4088 /*
4089 ** Begin a write-transaction on the specified pager object. If a
4090 ** write-transaction has already been opened, this function is a no-op.
4091 **
4092 ** If the exFlag argument is false, then acquire at least a RESERVED
4093 ** lock on the database file. If exFlag is true, then acquire at least
4094 ** an EXCLUSIVE lock. If such a lock is already held, no locking
4095 ** functions need be called.
4096 **
4097 ** If this is not a temporary or in-memory file and, the journal file is
4098 ** opened if it has not been already. For a temporary file, the opening
4099 ** of the journal file is deferred until there is an actual need to
4100 ** write to the journal. TODO: Why handle temporary files differently?
4101 **
4102 ** If the journal file is opened (or if it is already open), then a
4103 ** journal-header is written to the start of it.
4104 **
4105 ** If the subjInMemory argument is non-zero, then any sub-journal opened
4106 ** within this transaction will be opened as an in-memory file. This
4107 ** has no effect if the sub-journal is already opened (as it may be when
4108 ** running in exclusive mode) or if the transaction does not require a
4109 ** sub-journal. If the subjInMemory argument is zero, then any required
4110 ** sub-journal is implemented in-memory if pPager is an in-memory database,
4111 ** or using a temporary file otherwise.
4112 */
4113 int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
4114 int rc = SQLITE_OK;
4115 assert( pPager->state!=PAGER_UNLOCK );
4116 pPager->subjInMemory = (u8)subjInMemory;
4117 if( pPager->state==PAGER_SHARED ){
4118 assert( pPager->pInJournal==0 );
4119 assert( !MEMDB && !pPager->tempFile );
4120
4121 /* Obtain a RESERVED lock on the database file. If the exFlag parameter
4122 ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
4123 ** busy-handler callback can be used when upgrading to the EXCLUSIVE
4124 ** lock, but not when obtaining the RESERVED lock.
4125 */
4126 rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
4127 if( rc==SQLITE_OK ){
4128 pPager->state = PAGER_RESERVED;
4129 if( exFlag ){
4130 rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
4131 }
4132 }
4133
4134 /* If the required locks were successfully obtained, open the journal
4135 ** file and write the first journal-header to it.
4136 */
4137 if( rc==SQLITE_OK && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
4138 rc = pager_open_journal(pPager);
4139 }
4140 }else if( isOpen(pPager->jfd) && pPager->journalOff==0 ){
4141 /* This happens when the pager was in exclusive-access mode the last
4142 ** time a (read or write) transaction was successfully concluded
4143 ** by this connection. Instead of deleting the journal file it was
4144 ** kept open and either was truncated to 0 bytes or its header was
4145 ** overwritten with zeros.
4146 */
4147 assert( pPager->nRec==0 );
4148 assert( pPager->dbOrigSize==0 );
4149 assert( pPager->pInJournal==0 );
4150 rc = pager_open_journal(pPager);
4151 }
4152
4153 PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
4154 assert( !isOpen(pPager->jfd) || pPager->journalOff>0 || rc!=SQLITE_OK );
4155 if( rc!=SQLITE_OK ){
4156 assert( !pPager->dbModified );
4157 /* Ignore any IO error that occurs within pager_end_transaction(). The
4158 ** purpose of this call is to reset the internal state of the pager
4159 ** sub-system. It doesn't matter if the journal-file is not properly
4160 ** finalized at this point (since it is not a valid journal file anyway).
4161 */
4162 pager_end_transaction(pPager, 0);
4163 }
4164 return rc;
4165 }
4166
4167 /*
4168 ** Mark a single data page as writeable. The page is written into the
4169 ** main journal or sub-journal as required. If the page is written into
4170 ** one of the journals, the corresponding bit is set in the
4171 ** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
4172 ** of any open savepoints as appropriate.
4173 */
4174 static int pager_write(PgHdr *pPg){
4175 void *pData = pPg->pData;
4176 Pager *pPager = pPg->pPager;
4177 int rc = SQLITE_OK;
4178
4179 /* This routine is not called unless a transaction has already been
4180 ** started.
4181 */
4182 assert( pPager->state>=PAGER_RESERVED );
4183
4184 /* If an error has been previously detected, we should not be
4185 ** calling this routine. Repeat the error for robustness.
4186 */
4187 if( NEVER(pPager->errCode) ) return pPager->errCode;
4188
4189 /* Higher-level routines never call this function if database is not
4190 ** writable. But check anyway, just for robustness. */
4191 if( NEVER(pPager->readOnly) ) return SQLITE_PERM;
4192
4193 assert( !pPager->setMaster );
4194
4195 CHECK_PAGE(pPg);
4196
4197 /* Mark the page as dirty. If the page has already been written
4198 ** to the journal then we can return right away.
4199 */
4200 sqlite3PcacheMakeDirty(pPg);
4201 if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
4202 pPager->dbModified = 1;
4203 }else{
4204
4205 /* If we get this far, it means that the page needs to be
4206 ** written to the transaction journal or the ckeckpoint journal
4207 ** or both.
4208 **
4209 ** Higher level routines should have already started a transaction,
4210 ** which means they have acquired the necessary locks and opened
4211 ** a rollback journal. Double-check to makes sure this is the case.
4212 */
4213 rc = sqlite3PagerBegin(pPager, 0, pPager->subjInMemory);
4214 if( NEVER(rc!=SQLITE_OK) ){
4215 return rc;
4216 }
4217 if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
4218 assert( pPager->useJournal );
4219 rc = pager_open_journal(pPager);
4220 if( rc!=SQLITE_OK ) return rc;
4221 }
4222 pPager->dbModified = 1;
4223
4224 /* The transaction journal now exists and we have a RESERVED or an
4225 ** EXCLUSIVE lock on the main database file. Write the current page to
4226 ** the transaction journal if it is not there already.
4227 */
4228 if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){
4229 if( pPg->pgno<=pPager->dbOrigSize ){
4230 u32 cksum;
4231 char *pData2;
4232
4233 /* We should never write to the journal file the page that
4234 ** contains the database locks. The following assert verifies
4235 ** that we do not. */
4236 assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
4237 CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
4238 cksum = pager_cksum(pPager, (u8*)pData2);
4239 rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
4240 if( rc==SQLITE_OK ){
4241 rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
4242 pPager->journalOff + 4);
4243 pPager->journalOff += pPager->pageSize+4;
4244 }
4245 if( rc==SQLITE_OK ){
4246 rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
4247 pPager->journalOff += 4;
4248 }
4249 IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno,
4250 pPager->journalOff, pPager->pageSize));
4251 PAGER_INCR(sqlite3_pager_writej_count);
4252 PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n",
4253 PAGERID(pPager), pPg->pgno,
4254 ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg)));
4255
4256 /* Even if an IO or diskfull error occurred while journalling the
4257 ** page in the block above, set the need-sync flag for the page.
4258 ** Otherwise, when the transaction is rolled back, the logic in
4259 ** playback_one_page() will think that the page needs to be restored
4260 ** in the database file. And if an IO error occurs while doing so,
4261 ** then corruption may follow.
4262 */
4263 if( !pPager->noSync ){
4264 pPg->flags |= PGHDR_NEED_SYNC;
4265 pPager->needSync = 1;
4266 }
4267
4268 /* An error has occurred writing to the journal file. The
4269 ** transaction will be rolled back by the layer above.
4270 */
4271 if( rc!=SQLITE_OK ){
4272 return rc;
4273 }
4274
4275 pPager->nRec++;
4276 assert( pPager->pInJournal!=0 );
4277 rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
4278 testcase( rc==SQLITE_NOMEM );
4279 assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
4280 rc |= addToSavepointBitvecs(pPager, pPg->pgno);
4281 if( rc!=SQLITE_OK ){
4282 assert( rc==SQLITE_NOMEM );
4283 return rc;
4284 }
4285 }else{
4286 if( !pPager->journalStarted && !pPager->noSync ){
4287 pPg->flags |= PGHDR_NEED_SYNC;
4288 pPager->needSync = 1;
4289 }
4290 PAGERTRACE(("APPEND %d page %d needSync=%d\n",
4291 PAGERID(pPager), pPg->pgno,
4292 ((pPg->flags&PGHDR_NEED_SYNC)?1:0)));
4293 }
4294 }
4295
4296 /* If the statement journal is open and the page is not in it,
4297 ** then write the current page to the statement journal. Note that
4298 ** the statement journal format differs from the standard journal format
4299 ** in that it omits the checksums and the header.
4300 */
4301 if( subjRequiresPage(pPg) ){
4302 rc = subjournalPage(pPg);
4303 }
4304 }
4305
4306 /* Update the database size and return.
4307 */
4308 assert( pPager->state>=PAGER_SHARED );
4309 if( pPager->dbSize<pPg->pgno ){
4310 pPager->dbSize = pPg->pgno;
4311 }
4312 return rc;
4313 }
4314
4315 /*
4316 ** Mark a data page as writeable. This routine must be called before
4317 ** making changes to a page. The caller must check the return value
4318 ** of this function and be careful not to change any page data unless
4319 ** this routine returns SQLITE_OK.
4320 **
4321 ** The difference between this function and pager_write() is that this
4322 ** function also deals with the special case where 2 or more pages
4323 ** fit on a single disk sector. In this case all co-resident pages
4324 ** must have been written to the journal file before returning.
4325 **
4326 ** If an error occurs, SQLITE_NOMEM or an IO error code is returned
4327 ** as appropriate. Otherwise, SQLITE_OK.
4328 */
4329 int sqlite3PagerWrite(DbPage *pDbPage){
4330 int rc = SQLITE_OK;
4331
4332 PgHdr *pPg = pDbPage;
4333 Pager *pPager = pPg->pPager;
4334 Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
4335
4336 if( nPagePerSector>1 ){
4337 Pgno nPageCount; /* Total number of pages in database file */
4338 Pgno pg1; /* First page of the sector pPg is located on. */
4339 int nPage; /* Number of pages starting at pg1 to journal */
4340 int ii; /* Loop counter */
4341 int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */
4342
4343 /* Set the doNotSync flag to 1. This is because we cannot allow a journal
4344 ** header to be written between the pages journaled by this function.
4345 */
4346 assert( !MEMDB );
4347 assert( pPager->doNotSync==0 );
4348 pPager->doNotSync = 1;
4349
4350 /* This trick assumes that both the page-size and sector-size are
4351 ** an integer power of 2. It sets variable pg1 to the identifier
4352 ** of the first page of the sector pPg is located on.
4353 */
4354 pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
4355
4356 sqlite3PagerPagecount(pPager, (int *)&nPageCount);
4357 if( pPg->pgno>nPageCount ){
4358 nPage = (pPg->pgno - pg1)+1;
4359 }else if( (pg1+nPagePerSector-1)>nPageCount ){
4360 nPage = nPageCount+1-pg1;
4361 }else{
4362 nPage = nPagePerSector;
4363 }
4364 assert(nPage>0);
4365 assert(pg1<=pPg->pgno);
4366 assert((pg1+nPage)>pPg->pgno);
4367
4368 for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
4369 Pgno pg = pg1+ii;
4370 PgHdr *pPage;
4371 if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
4372 if( pg!=PAGER_MJ_PGNO(pPager) ){
4373 rc = sqlite3PagerGet(pPager, pg, &pPage);
4374 if( rc==SQLITE_OK ){
4375 rc = pager_write(pPage);
4376 if( pPage->flags&PGHDR_NEED_SYNC ){
4377 needSync = 1;
4378 assert(pPager->needSync);
4379 }
4380 sqlite3PagerUnref(pPage);
4381 }
4382 }
4383 }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
4384 if( pPage->flags&PGHDR_NEED_SYNC ){
4385 needSync = 1;
4386 }
4387 sqlite3PagerUnref(pPage);
4388 }
4389 }
4390
4391 /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages
4392 ** starting at pg1, then it needs to be set for all of them. Because
4393 ** writing to any of these nPage pages may damage the others, the
4394 ** journal file must contain sync()ed copies of all of them
4395 ** before any of them can be written out to the database file.
4396 */
4397 if( rc==SQLITE_OK && needSync ){
4398 assert( !MEMDB && pPager->noSync==0 );
4399 for(ii=0; ii<nPage; ii++){
4400 PgHdr *pPage = pager_lookup(pPager, pg1+ii);
4401 if( pPage ){
4402 pPage->flags |= PGHDR_NEED_SYNC;
4403 sqlite3PagerUnref(pPage);
4404 }
4405 }
4406 assert(pPager->needSync);
4407 }
4408
4409 assert( pPager->doNotSync==1 );
4410 pPager->doNotSync = 0;
4411 }else{
4412 rc = pager_write(pDbPage);
4413 }
4414 return rc;
4415 }
4416
4417 /*
4418 ** Return TRUE if the page given in the argument was previously passed
4419 ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok
4420 ** to change the content of the page.
4421 */
4422 #ifndef NDEBUG
4423 int sqlite3PagerIswriteable(DbPage *pPg){
4424 return pPg->flags&PGHDR_DIRTY;
4425 }
4426 #endif
4427
4428 /*
4429 ** A call to this routine tells the pager that it is not necessary to
4430 ** write the information on page pPg back to the disk, even though
4431 ** that page might be marked as dirty. This happens, for example, when
4432 ** the page has been added as a leaf of the freelist and so its
4433 ** content no longer matters.
4434 **
4435 ** The overlying software layer calls this routine when all of the data
4436 ** on the given page is unused. The pager marks the page as clean so
4437 ** that it does not get written to disk.
4438 **
4439 ** Tests show that this optimization can quadruple the speed of large
4440 ** DELETE operations.
4441 */
4442 void sqlite3PagerDontWrite(PgHdr *pPg){
4443 Pager *pPager = pPg->pPager;
4444 if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
4445 PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
4446 IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
4447 pPg->flags |= PGHDR_DONT_WRITE;
4448 #ifdef SQLITE_CHECK_PAGES
4449 pPg->pageHash = pager_pagehash(pPg);
4450 #endif
4451 }
4452 }
4453
4454 /*
4455 ** This routine is called to increment the value of the database file
4456 ** change-counter, stored as a 4-byte big-endian integer starting at
4457 ** byte offset 24 of the pager file.
4458 **
4459 ** If the isDirectMode flag is zero, then this is done by calling
4460 ** sqlite3PagerWrite() on page 1, then modifying the contents of the
4461 ** page data. In this case the file will be updated when the current
4462 ** transaction is committed.
4463 **
4464 ** The isDirectMode flag may only be non-zero if the library was compiled
4465 ** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case,
4466 ** if isDirect is non-zero, then the database file is updated directly
4467 ** by writing an updated version of page 1 using a call to the
4468 ** sqlite3OsWrite() function.
4469 */
4470 static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
4471 int rc = SQLITE_OK;
4472
4473 /* Declare and initialize constant integer 'isDirect'. If the
4474 ** atomic-write optimization is enabled in this build, then isDirect
4475 ** is initialized to the value passed as the isDirectMode parameter
4476 ** to this function. Otherwise, it is always set to zero.
4477 **
4478 ** The idea is that if the atomic-write optimization is not
4479 ** enabled at compile time, the compiler can omit the tests of
4480 ** 'isDirect' below, as well as the block enclosed in the
4481 ** "if( isDirect )" condition.
4482 */
4483 #ifndef SQLITE_ENABLE_ATOMIC_WRITE
4484 # define DIRECT_MODE 0
4485 assert( isDirectMode==0 );
4486 UNUSED_PARAMETER(isDirectMode);
4487 #else
4488 # define DIRECT_MODE isDirectMode
4489 #endif
4490
4491 assert( pPager->state>=PAGER_RESERVED );
4492 if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){
4493 PgHdr *pPgHdr; /* Reference to page 1 */
4494 u32 change_counter; /* Initial value of change-counter field */
4495
4496 assert( !pPager->tempFile && isOpen(pPager->fd) );
4497
4498 /* Open page 1 of the file for writing. */
4499 rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
4500 assert( pPgHdr==0 || rc==SQLITE_OK );
4501
4502 /* If page one was fetched successfully, and this function is not
4503 ** operating in direct-mode, make page 1 writable. When not in
4504 ** direct mode, page 1 is always held in cache and hence the PagerGet()
4505 ** above is always successful - hence the ALWAYS on rc==SQLITE_OK.
4506 */
4507 if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){
4508 rc = sqlite3PagerWrite(pPgHdr);
4509 }
4510
4511 if( rc==SQLITE_OK ){
4512 /* Increment the value just read and write it back to byte 24. */
4513 change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
4514 change_counter++;
4515 put32bits(((char*)pPgHdr->pData)+24, change_counter);
4516
4517 /* If running in direct mode, write the contents of page 1 to the file. */
4518 if( DIRECT_MODE ){
4519 const void *zBuf = pPgHdr->pData;
4520 assert( pPager->dbFileSize>0 );
4521 rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
4522 if( rc==SQLITE_OK ){
4523 pPager->changeCountDone = 1;
4524 }
4525 }else{
4526 pPager->changeCountDone = 1;
4527 }
4528 }
4529
4530 /* Release the page reference. */
4531 sqlite3PagerUnref(pPgHdr);
4532 }
4533 return rc;
4534 }
4535
4536 /*
4537 ** Sync the pager file to disk. This is a no-op for in-memory files
4538 ** or pages with the Pager.noSync flag set.
4539 **
4540 ** If successful, or called on a pager for which it is a no-op, this
4541 ** function returns SQLITE_OK. Otherwise, an IO error code is returned.
4542 */
4543 int sqlite3PagerSync(Pager *pPager){
4544 int rc; /* Return code */
4545 assert( !MEMDB );
4546 if( pPager->noSync ){
4547 rc = SQLITE_OK;
4548 }else{
4549 rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
4550 }
4551 return rc;
4552 }
4553
4554 /*
4555 ** Sync the database file for the pager pPager. zMaster points to the name
4556 ** of a master journal file that should be written into the individual
4557 ** journal file. zMaster may be NULL, which is interpreted as no master
4558 ** journal (a single database transaction).
4559 **
4560 ** This routine ensures that:
4561 **
4562 ** * The database file change-counter is updated,
4563 ** * the journal is synced (unless the atomic-write optimization is used),
4564 ** * all dirty pages are written to the database file,
4565 ** * the database file is truncated (if required), and
4566 ** * the database file synced.
4567 **
4568 ** The only thing that remains to commit the transaction is to finalize
4569 ** (delete, truncate or zero the first part of) the journal file (or
4570 ** delete the master journal file if specified).
4571 **
4572 ** Note that if zMaster==NULL, this does not overwrite a previous value
4573 ** passed to an sqlite3PagerCommitPhaseOne() call.
4574 **
4575 ** If the final parameter - noSync - is true, then the database file itself
4576 ** is not synced. The caller must call sqlite3PagerSync() directly to
4577 ** sync the database file before calling CommitPhaseTwo() to delete the
4578 ** journal file in this case.
4579 */
4580 int sqlite3PagerCommitPhaseOne(
4581 Pager *pPager, /* Pager object */
4582 const char *zMaster, /* If not NULL, the master journal name */
4583 int noSync /* True to omit the xSync on the db file */
4584 ){
4585 int rc = SQLITE_OK; /* Return code */
4586
4587 /* The dbOrigSize is never set if journal_mode=OFF */
4588 assert( pPager->journalMode!=PAGER_JOURNALMODE_OFF || pPager->dbOrigSize==0 );
4589
4590 /* If a prior error occurred, this routine should not be called. ROLLBACK
4591 ** is the appropriate response to an error, not COMMIT. Guard against
4592 ** coding errors by repeating the prior error. */
4593 if( NEVER(pPager->errCode) ) return pPager->errCode;
4594
4595 PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n",
4596 pPager->zFilename, zMaster, pPager->dbSize));
4597
4598 if( MEMDB && pPager->dbModified ){
4599 /* If this is an in-memory db, or no pages have been written to, or this
4600 ** function has already been called, it is mostly a no-op. However, any
4601 ** backup in progress needs to be restarted.
4602 */
4603 sqlite3BackupRestart(pPager->pBackup);
4604 }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){
4605
4606 /* The following block updates the change-counter. Exactly how it
4607 ** does this depends on whether or not the atomic-update optimization
4608 ** was enabled at compile time, and if this transaction meets the
4609 ** runtime criteria to use the operation:
4610 **
4611 ** * The file-system supports the atomic-write property for
4612 ** blocks of size page-size, and
4613 ** * This commit is not part of a multi-file transaction, and
4614 ** * Exactly one page has been modified and store in the journal file.
4615 **
4616 ** If the optimization was not enabled at compile time, then the
4617 ** pager_incr_changecounter() function is called to update the change
4618 ** counter in 'indirect-mode'. If the optimization is compiled in but
4619 ** is not applicable to this transaction, call sqlite3JournalCreate()
4620 ** to make sure the journal file has actually been created, then call
4621 ** pager_incr_changecounter() to update the change-counter in indirect
4622 ** mode.
4623 **
4624 ** Otherwise, if the optimization is both enabled and applicable,
4625 ** then call pager_incr_changecounter() to update the change-counter
4626 ** in 'direct' mode. In this case the journal file will never be
4627 ** created for this transaction.
4628 */
4629 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
4630 PgHdr *pPg;
4631 assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF );
4632 if( !zMaster && isOpen(pPager->jfd)
4633 && pPager->journalOff==jrnlBufferSize(pPager)
4634 && pPager->dbSize>=pPager->dbFileSize
4635 && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
4636 ){
4637 /* Update the db file change counter via the direct-write method. The
4638 ** following call will modify the in-memory representation of page 1
4639 ** to include the updated change counter and then write page 1
4640 ** directly to the database file. Because of the atomic-write
4641 ** property of the host file-system, this is safe.
4642 */
4643 rc = pager_incr_changecounter(pPager, 1);
4644 }else{
4645 rc = sqlite3JournalCreate(pPager->jfd);
4646 if( rc==SQLITE_OK ){
4647 rc = pager_incr_changecounter(pPager, 0);
4648 }
4649 }
4650 #else
4651 rc = pager_incr_changecounter(pPager, 0);
4652 #endif
4653 if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
4654
4655 /* If this transaction has made the database smaller, then all pages
4656 ** being discarded by the truncation must be written to the journal
4657 ** file. This can only happen in auto-vacuum mode.
4658 **
4659 ** Before reading the pages with page numbers larger than the
4660 ** current value of Pager.dbSize, set dbSize back to the value
4661 ** that it took at the start of the transaction. Otherwise, the
4662 ** calls to sqlite3PagerGet() return zeroed pages instead of
4663 ** reading data from the database file.
4664 **
4665 ** When journal_mode==OFF the dbOrigSize is always zero, so this
4666 ** block never runs if journal_mode=OFF.
4667 */
4668 #ifndef SQLITE_OMIT_AUTOVACUUM
4669 if( pPager->dbSize<pPager->dbOrigSize
4670 && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF)
4671 ){
4672 Pgno i; /* Iterator variable */
4673 const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */
4674 const Pgno dbSize = pPager->dbSize; /* Database image size */
4675 pPager->dbSize = pPager->dbOrigSize;
4676 for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){
4677 if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
4678 PgHdr *pPage; /* Page to journal */
4679 rc = sqlite3PagerGet(pPager, i, &pPage);
4680 if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
4681 rc = sqlite3PagerWrite(pPage);
4682 sqlite3PagerUnref(pPage);
4683 if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
4684 }
4685 }
4686 pPager->dbSize = dbSize;
4687 }
4688 #endif
4689
4690 /* Write the master journal name into the journal file. If a master
4691 ** journal file name has already been written to the journal file,
4692 ** or if zMaster is NULL (no master journal), then this call is a no-op.
4693 */
4694 rc = writeMasterJournal(pPager, zMaster);
4695 if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
4696
4697 /* Sync the journal file. If the atomic-update optimization is being
4698 ** used, this call will not create the journal file or perform any
4699 ** real IO.
4700 */
4701 rc = syncJournal(pPager);
4702 if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
4703
4704 /* Write all dirty pages to the database file. */
4705 rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache));
4706 if( rc!=SQLITE_OK ){
4707 assert( rc!=SQLITE_IOERR_BLOCKED );
4708 goto commit_phase_one_exit;
4709 }
4710 sqlite3PcacheCleanAll(pPager->pPCache);
4711
4712 /* If the file on disk is not the same size as the database image,
4713 ** then use pager_truncate to grow or shrink the file here.
4714 */
4715 if( pPager->dbSize!=pPager->dbFileSize ){
4716 Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
4717 assert( pPager->state>=PAGER_EXCLUSIVE );
4718 rc = pager_truncate(pPager, nNew);
4719 if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
4720 }
4721
4722 /* Finally, sync the database file. */
4723 if( !pPager->noSync && !noSync ){
4724 rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
4725 }
4726 IOTRACE(("DBSYNC %p\n", pPager))
4727
4728 pPager->state = PAGER_SYNCED;
4729 }
4730
4731 commit_phase_one_exit:
4732 return rc;
4733 }
4734
4735
4736 /*
4737 ** When this function is called, the database file has been completely
4738 ** updated to reflect the changes made by the current transaction and
4739 ** synced to disk. The journal file still exists in the file-system
4740 ** though, and if a failure occurs at this point it will eventually
4741 ** be used as a hot-journal and the current transaction rolled back.
4742 **
4743 ** This function finalizes the journal file, either by deleting,
4744 ** truncating or partially zeroing it, so that it cannot be used
4745 ** for hot-journal rollback. Once this is done the transaction is
4746 ** irrevocably committed.
4747 **
4748 ** If an error occurs, an IO error code is returned and the pager
4749 ** moves into the error state. Otherwise, SQLITE_OK is returned.
4750 */
4751 int sqlite3PagerCommitPhaseTwo(Pager *pPager){
4752 int rc = SQLITE_OK; /* Return code */
4753
4754 /* This routine should not be called if a prior error has occurred.
4755 ** But if (due to a coding error elsewhere in the system) it does get
4756 ** called, just return the same error code without doing anything. */
4757 if( NEVER(pPager->errCode) ) return pPager->errCode;
4758
4759 /* This function should not be called if the pager is not in at least
4760 ** PAGER_RESERVED state. And indeed SQLite never does this. But it is
4761 ** nice to have this defensive test here anyway.
4762 */
4763 if( NEVER(pPager->state<PAGER_RESERVED) ) return SQLITE_ERROR;
4764
4765 /* An optimization. If the database was not actually modified during
4766 ** this transaction, the pager is running in exclusive-mode and is
4767 ** using persistent journals, then this function is a no-op.
4768 **
4769 ** The start of the journal file currently contains a single journal
4770 ** header with the nRec field set to 0. If such a journal is used as
4771 ** a hot-journal during hot-journal rollback, 0 changes will be made
4772 ** to the database file. So there is no need to zero the journal
4773 ** header. Since the pager is in exclusive mode, there is no need
4774 ** to drop any locks either.
4775 */
4776 if( pPager->dbModified==0 && pPager->exclusiveMode
4777 && pPager->journalMode==PAGER_JOURNALMODE_PERSIST
4778 ){
4779 assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
4780 return SQLITE_OK;
4781 }
4782
4783 PAGERTRACE(("COMMIT %d\n", PAGERID(pPager)));
4784 assert( pPager->state==PAGER_SYNCED || MEMDB || !pPager->dbModified );
4785 rc = pager_end_transaction(pPager, pPager->setMaster);
4786 return pager_error(pPager, rc);
4787 }
4788
4789 /*
4790 ** Rollback all changes. The database falls back to PAGER_SHARED mode.
4791 **
4792 ** This function performs two tasks:
4793 **
4794 ** 1) It rolls back the journal file, restoring all database file and
4795 ** in-memory cache pages to the state they were in when the transaction
4796 ** was opened, and
4797 ** 2) It finalizes the journal file, so that it is not used for hot
4798 ** rollback at any point in the future.
4799 **
4800 ** subject to the following qualifications:
4801 **
4802 ** * If the journal file is not yet open when this function is called,
4803 ** then only (2) is performed. In this case there is no journal file
4804 ** to roll back.
4805 **
4806 ** * If in an error state other than SQLITE_FULL, then task (1) is
4807 ** performed. If successful, task (2). Regardless of the outcome
4808 ** of either, the error state error code is returned to the caller
4809 ** (i.e. either SQLITE_IOERR or SQLITE_CORRUPT).
4810 **
4811 ** * If the pager is in PAGER_RESERVED state, then attempt (1). Whether
4812 ** or not (1) is succussful, also attempt (2). If successful, return
4813 ** SQLITE_OK. Otherwise, enter the error state and return the first
4814 ** error code encountered.
4815 **
4816 ** In this case there is no chance that the database was written to.
4817 ** So is safe to finalize the journal file even if the playback
4818 ** (operation 1) failed. However the pager must enter the error state
4819 ** as the contents of the in-memory cache are now suspect.
4820 **
4821 ** * Finally, if in PAGER_EXCLUSIVE state, then attempt (1). Only
4822 ** attempt (2) if (1) is successful. Return SQLITE_OK if successful,
4823 ** otherwise enter the error state and return the error code from the
4824 ** failing operation.
4825 **
4826 ** In this case the database file may have been written to. So if the
4827 ** playback operation did not succeed it would not be safe to finalize
4828 ** the journal file. It needs to be left in the file-system so that
4829 ** some other process can use it to restore the database state (by
4830 ** hot-journal rollback).
4831 */
4832 int sqlite3PagerRollback(Pager *pPager){
4833 int rc = SQLITE_OK; /* Return code */
4834 PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));
4835 if( !pPager->dbModified || !isOpen(pPager->jfd) ){
4836 rc = pager_end_transaction(pPager, pPager->setMaster);
4837 }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
4838 if( pPager->state>=PAGER_EXCLUSIVE ){
4839 pager_playback(pPager, 0);
4840 }
4841 rc = pPager->errCode;
4842 }else{
4843 if( pPager->state==PAGER_RESERVED ){
4844 int rc2;
4845 rc = pager_playback(pPager, 0);
4846 rc2 = pager_end_transaction(pPager, pPager->setMaster);
4847 if( rc==SQLITE_OK ){
4848 rc = rc2;
4849 }
4850 }else{
4851 rc = pager_playback(pPager, 0);
4852 }
4853
4854 if( !MEMDB ){
4855 pPager->dbSizeValid = 0;
4856 }
4857
4858 /* If an error occurs during a ROLLBACK, we can no longer trust the pager
4859 ** cache. So call pager_error() on the way out to make any error
4860 ** persistent.
4861 */
4862 rc = pager_error(pPager, rc);
4863 }
4864 return rc;
4865 }
4866
4867 /*
4868 ** Return TRUE if the database file is opened read-only. Return FALSE
4869 ** if the database is (in theory) writable.
4870 */
4871 u8 sqlite3PagerIsreadonly(Pager *pPager){
4872 return pPager->readOnly;
4873 }
4874
4875 /*
4876 ** Return the number of references to the pager.
4877 */
4878 int sqlite3PagerRefcount(Pager *pPager){
4879 return sqlite3PcacheRefCount(pPager->pPCache);
4880 }
4881
4882 /*
4883 ** Return the number of references to the specified page.
4884 */
4885 int sqlite3PagerPageRefcount(DbPage *pPage){
4886 return sqlite3PcachePageRefcount(pPage);
4887 }
4888
4889 #ifdef SQLITE_TEST
4890 /*
4891 ** This routine is used for testing and analysis only.
4892 */
4893 int *sqlite3PagerStats(Pager *pPager){
4894 static int a[11];
4895 a[0] = sqlite3PcacheRefCount(pPager->pPCache);
4896 a[1] = sqlite3PcachePagecount(pPager->pPCache);
4897 a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
4898 a[3] = pPager->dbSizeValid ? (int) pPager->dbSize : -1;
4899 a[4] = pPager->state;
4900 a[5] = pPager->errCode;
4901 a[6] = pPager->nHit;
4902 a[7] = pPager->nMiss;
4903 a[8] = 0; /* Used to be pPager->nOvfl */
4904 a[9] = pPager->nRead;
4905 a[10] = pPager->nWrite;
4906 return a;
4907 }
4908 #endif
4909
4910 /*
4911 ** Return true if this is an in-memory pager.
4912 */
4913 int sqlite3PagerIsMemdb(Pager *pPager){
4914 return MEMDB;
4915 }
4916
4917 /*
4918 ** Check that there are at least nSavepoint savepoints open. If there are
4919 ** currently less than nSavepoints open, then open one or more savepoints
4920 ** to make up the difference. If the number of savepoints is already
4921 ** equal to nSavepoint, then this function is a no-op.
4922 **
4923 ** If a memory allocation fails, SQLITE_NOMEM is returned. If an error
4924 ** occurs while opening the sub-journal file, then an IO error code is
4925 ** returned. Otherwise, SQLITE_OK.
4926 */
4927 int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
4928 int rc = SQLITE_OK; /* Return code */
4929 int nCurrent = pPager->nSavepoint; /* Current number of savepoints */
4930
4931 if( nSavepoint>nCurrent && pPager->useJournal ){
4932 int ii; /* Iterator variable */
4933 PagerSavepoint *aNew; /* New Pager.aSavepoint array */
4934
4935 /* Either there is no active journal or the sub-journal is open or
4936 ** the journal is always stored in memory */
4937 assert( pPager->nSavepoint==0 || isOpen(pPager->sjfd) ||
4938 pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
4939
4940 /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM
4941 ** if the allocation fails. Otherwise, zero the new portion in case a
4942 ** malloc failure occurs while populating it in the for(...) loop below.
4943 */
4944 aNew = (PagerSavepoint *)sqlite3Realloc(
4945 pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint
4946 );
4947 if( !aNew ){
4948 return SQLITE_NOMEM;
4949 }
4950 memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint));
4951 pPager->aSavepoint = aNew;
4952 pPager->nSavepoint = nSavepoint;
4953
4954 /* Populate the PagerSavepoint structures just allocated. */
4955 for(ii=nCurrent; ii<nSavepoint; ii++){
4956 assert( pPager->dbSizeValid );
4957 aNew[ii].nOrig = pPager->dbSize;
4958 if( isOpen(pPager->jfd) && ALWAYS(pPager->journalOff>0) ){
4959 aNew[ii].iOffset = pPager->journalOff;
4960 }else{
4961 aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager);
4962 }
4963 aNew[ii].iSubRec = pPager->nSubRec;
4964 aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize);
4965 if( !aNew[ii].pInSavepoint ){
4966 return SQLITE_NOMEM;
4967 }
4968 }
4969
4970 /* Open the sub-journal, if it is not already opened. */
4971 rc = openSubJournal(pPager);
4972 assertTruncateConstraint(pPager);
4973 }
4974
4975 return rc;
4976 }
4977
4978 /*
4979 ** This function is called to rollback or release (commit) a savepoint.
4980 ** The savepoint to release or rollback need not be the most recently
4981 ** created savepoint.
4982 **
4983 ** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE.
4984 ** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with
4985 ** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes
4986 ** that have occurred since the specified savepoint was created.
4987 **
4988 ** The savepoint to rollback or release is identified by parameter
4989 ** iSavepoint. A value of 0 means to operate on the outermost savepoint
4990 ** (the first created). A value of (Pager.nSavepoint-1) means operate
4991 ** on the most recently created savepoint. If iSavepoint is greater than
4992 ** (Pager.nSavepoint-1), then this function is a no-op.
4993 **
4994 ** If a negative value is passed to this function, then the current
4995 ** transaction is rolled back. This is different to calling
4996 ** sqlite3PagerRollback() because this function does not terminate
4997 ** the transaction or unlock the database, it just restores the
4998 ** contents of the database to its original state.
4999 **
5000 ** In any case, all savepoints with an index greater than iSavepoint
5001 ** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE),
5002 ** then savepoint iSavepoint is also destroyed.
5003 **
5004 ** This function may return SQLITE_NOMEM if a memory allocation fails,
5005 ** or an IO error code if an IO error occurs while rolling back a
5006 ** savepoint. If no errors occur, SQLITE_OK is returned.
5007 */
5008 int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
5009 int rc = SQLITE_OK;
5010
5011 assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
5012 assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK );
5013
5014 if( iSavepoint<pPager->nSavepoint ){
5015 int ii; /* Iterator variable */
5016 int nNew; /* Number of remaining savepoints after this op. */
5017
5018 /* Figure out how many savepoints will still be active after this
5019 ** operation. Store this value in nNew. Then free resources associated
5020 ** with any savepoints that are destroyed by this operation.
5021 */
5022 nNew = iSavepoint + (op==SAVEPOINT_ROLLBACK);
5023 for(ii=nNew; ii<pPager->nSavepoint; ii++){
5024 sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
5025 }
5026 pPager->nSavepoint = nNew;
5027
5028 /* If this is a rollback operation, playback the specified savepoint.
5029 ** If this is a temp-file, it is possible that the journal file has
5030 ** not yet been opened. In this case there have been no changes to
5031 ** the database file, so the playback operation can be skipped.
5032 */
5033 if( op==SAVEPOINT_ROLLBACK && isOpen(pPager->jfd) ){
5034 PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
5035 rc = pagerPlaybackSavepoint(pPager, pSavepoint);
5036 assert(rc!=SQLITE_DONE);
5037 }
5038
5039 /* If this is a release of the outermost savepoint, truncate
5040 ** the sub-journal to zero bytes in size. */
5041 if( nNew==0 && op==SAVEPOINT_RELEASE && isOpen(pPager->sjfd) ){
5042 assert( rc==SQLITE_OK );
5043 rc = sqlite3OsTruncate(pPager->sjfd, 0);
5044 pPager->nSubRec = 0;
5045 }
5046 }
5047 return rc;
5048 }
5049
5050 /*
5051 ** Return the full pathname of the database file.
5052 */
5053 const char *sqlite3PagerFilename(Pager *pPager){
5054 return pPager->zFilename;
5055 }
5056
5057 /*
5058 ** Return the VFS structure for the pager.
5059 */
5060 const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
5061 return pPager->pVfs;
5062 }
5063
5064 /*
5065 ** Return the file handle for the database file associated
5066 ** with the pager. This might return NULL if the file has
5067 ** not yet been opened.
5068 */
5069 sqlite3_file *sqlite3PagerFile(Pager *pPager){
5070 return pPager->fd;
5071 }
5072
5073 /*
5074 ** Return the full pathname of the journal file.
5075 */
5076 const char *sqlite3PagerJournalname(Pager *pPager){
5077 return pPager->zJournal;
5078 }
5079
5080 /*
5081 ** Return true if fsync() calls are disabled for this pager. Return FALSE
5082 ** if fsync()s are executed normally.
5083 */
5084 int sqlite3PagerNosync(Pager *pPager){
5085 return pPager->noSync;
5086 }
5087
5088 #ifdef SQLITE_HAS_CODEC
5089 /*
5090 ** Set or retrieve the codec for this pager
5091 */
5092 static void sqlite3PagerSetCodec(
5093 Pager *pPager,
5094 void *(*xCodec)(void*,void*,Pgno,int),
5095 void (*xCodecSizeChng)(void*,int,int),
5096 void (*xCodecFree)(void*),
5097 void *pCodec
5098 ){
5099 if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
5100 pPager->xCodec = xCodec;
5101 pPager->xCodecSizeChng = xCodecSizeChng;
5102 pPager->xCodecFree = xCodecFree;
5103 pPager->pCodec = pCodec;
5104 pagerReportSize(pPager);
5105 }
5106 static void *sqlite3PagerGetCodec(Pager *pPager){
5107 return pPager->pCodec;
5108 }
5109 #endif
5110
5111 #ifndef SQLITE_OMIT_AUTOVACUUM
5112 /*
5113 ** Move the page pPg to location pgno in the file.
5114 **
5115 ** There must be no references to the page previously located at
5116 ** pgno (which we call pPgOld) though that page is allowed to be
5117 ** in cache. If the page previously located at pgno is not already
5118 ** in the rollback journal, it is not put there by by this routine.
5119 **
5120 ** References to the page pPg remain valid. Updating any
5121 ** meta-data associated with pPg (i.e. data stored in the nExtra bytes
5122 ** allocated along with the page) is the responsibility of the caller.
5123 **
5124 ** A transaction must be active when this routine is called. It used to be
5125 ** required that a statement transaction was not active, but this restriction
5126 ** has been removed (CREATE INDEX needs to move a page when a statement
5127 ** transaction is active).
5128 **
5129 ** If the fourth argument, isCommit, is non-zero, then this page is being
5130 ** moved as part of a database reorganization just before the transaction
5131 ** is being committed. In this case, it is guaranteed that the database page
5132 ** pPg refers to will not be written to again within this transaction.
5133 **
5134 ** This function may return SQLITE_NOMEM or an IO error code if an error
5135 ** occurs. Otherwise, it returns SQLITE_OK.
5136 */
5137 int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
5138 PgHdr *pPgOld; /* The page being overwritten. */
5139 Pgno needSyncPgno = 0; /* Old value of pPg->pgno, if sync is required */
5140 int rc; /* Return code */
5141 Pgno origPgno; /* The original page number */
5142
5143 assert( pPg->nRef>0 );
5144
5145 /* If the page being moved is dirty and has not been saved by the latest
5146 ** savepoint, then save the current contents of the page into the
5147 ** sub-journal now. This is required to handle the following scenario:
5148 **
5149 ** BEGIN;
5150 ** <journal page X, then modify it in memory>
5151 ** SAVEPOINT one;
5152 ** <Move page X to location Y>
5153 ** ROLLBACK TO one;
5154 **
5155 ** If page X were not written to the sub-journal here, it would not
5156 ** be possible to restore its contents when the "ROLLBACK TO one"
5157 ** statement were is processed.
5158 **
5159 ** subjournalPage() may need to allocate space to store pPg->pgno into
5160 ** one or more savepoint bitvecs. This is the reason this function
5161 ** may return SQLITE_NOMEM.
5162 */
5163 if( pPg->flags&PGHDR_DIRTY
5164 && subjRequiresPage(pPg)
5165 && SQLITE_OK!=(rc = subjournalPage(pPg))
5166 ){
5167 return rc;
5168 }
5169
5170 PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n",
5171 PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno));
5172 IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
5173
5174 /* If the journal needs to be sync()ed before page pPg->pgno can
5175 ** be written to, store pPg->pgno in local variable needSyncPgno.
5176 **
5177 ** If the isCommit flag is set, there is no need to remember that
5178 ** the journal needs to be sync()ed before database page pPg->pgno
5179 ** can be written to. The caller has already promised not to write to it.
5180 */
5181 if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
5182 needSyncPgno = pPg->pgno;
5183 assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
5184 assert( pPg->flags&PGHDR_DIRTY );
5185 assert( pPager->needSync );
5186 }
5187
5188 /* If the cache contains a page with page-number pgno, remove it
5189 ** from its hash chain. Also, if the PgHdr.needSync was set for
5190 ** page pgno before the 'move' operation, it needs to be retained
5191 ** for the page moved there.
5192 */
5193 pPg->flags &= ~PGHDR_NEED_SYNC;
5194 pPgOld = pager_lookup(pPager, pgno);
5195 assert( !pPgOld || pPgOld->nRef==1 );
5196 if( pPgOld ){
5197 pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
5198 sqlite3PcacheDrop(pPgOld);
5199 }
5200
5201 origPgno = pPg->pgno;
5202 sqlite3PcacheMove(pPg, pgno);
5203 sqlite3PcacheMakeDirty(pPg);
5204 pPager->dbModified = 1;
5205
5206 if( needSyncPgno ){
5207 /* If needSyncPgno is non-zero, then the journal file needs to be
5208 ** sync()ed before any data is written to database file page needSyncPgno.
5209 ** Currently, no such page exists in the page-cache and the
5210 ** "is journaled" bitvec flag has been set. This needs to be remedied by
5211 ** loading the page into the pager-cache and setting the PgHdr.needSync
5212 ** flag.
5213 **
5214 ** If the attempt to load the page into the page-cache fails, (due
5215 ** to a malloc() or IO failure), clear the bit in the pInJournal[]
5216 ** array. Otherwise, if the page is loaded and written again in
5217 ** this transaction, it may be written to the database file before
5218 ** it is synced into the journal file. This way, it may end up in
5219 ** the journal file twice, but that is not a problem.
5220 **
5221 ** The sqlite3PagerGet() call may cause the journal to sync. So make
5222 ** sure the Pager.needSync flag is set too.
5223 */
5224 PgHdr *pPgHdr;
5225 assert( pPager->needSync );
5226 rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
5227 if( rc!=SQLITE_OK ){
5228 if( needSyncPgno<=pPager->dbOrigSize ){
5229 assert( pPager->pTmpSpace!=0 );
5230 sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace);
5231 }
5232 return rc;
5233 }
5234 pPager->needSync = 1;
5235 assert( pPager->noSync==0 && !MEMDB );
5236 pPgHdr->flags |= PGHDR_NEED_SYNC;
5237 sqlite3PcacheMakeDirty(pPgHdr);
5238 sqlite3PagerUnref(pPgHdr);
5239 }
5240
5241 /*
5242 ** For an in-memory database, make sure the original page continues
5243 ** to exist, in case the transaction needs to roll back. We allocate
5244 ** the page now, instead of at rollback, because we can better deal
5245 ** with an out-of-memory error now. Ticket #3761.
5246 */
5247 if( MEMDB ){
5248 DbPage *pNew;
5249 rc = sqlite3PagerAcquire(pPager, origPgno, &pNew, 1);
5250 if( rc!=SQLITE_OK ){
5251 sqlite3PcacheMove(pPg, origPgno);
5252 return rc;
5253 }
5254 sqlite3PagerUnref(pNew);
5255 }
5256
5257 return SQLITE_OK;
5258 }
5259 #endif
5260
5261 /* Begin preload-cache.patch for Chromium */
5262 /**
5263 ** When making large allocations, there is no need to stress the heap and
5264 ** potentially hold its lock while we allocate a bunch of memory. If we know
5265 ** the allocation will be large, go directly to the OS instead of the heap.
5266 **/
5267 static void* allocLarge(size_t size) {
5268 #if SQLITE_OS_WIN
5269 return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
5270 #else
5271 return sqlite3Malloc(size);
5272 #endif
5273 }
5274
5275 static void freeLarge(void* ptr) {
5276 #if SQLITE_OS_WIN
5277 VirtualFree(ptr, 0, MEM_RELEASE);
5278 #else
5279 sqlite3_free(ptr);
5280 #endif
5281 }
5282
5283 /**
5284 ** Addition: This will attempt to populate the database cache with
5285 ** the first N bytes of the file, where N is the total size of the cache.
5286 ** Because we can load this as one chunk from the disk, this is much faster
5287 ** than loading a subset of the pages one at a time in random order.
5288 **
5289 ** The pager must be initialized before this function is called. This means a
5290 * statement must be open that has initialized the pager and is keeping the
5291 ** cache in memory.
5292 **/
5293 int sqlite3PagerLoadall(Pager* pPager)
5294 {
5295 int i;
5296 int rc;
5297 int nMax;
5298 int loadSize;
5299 int loadPages;
5300 unsigned char *fileData;
5301
5302 if (pPager->dbSize < 0 || pPager->pageSize < 0) {
5303 /* pager not initialized, this means a statement is not open */
5304 return SQLITE_MISUSE;
5305 }
5306
5307 /* compute sizes */
5308 nMax = sqlite3PcacheGetCachesize(pPager->pPCache);
5309 if (nMax < pPager->dbSize)
5310 loadPages = nMax;
5311 else
5312 loadPages = pPager->dbSize;
5313 loadSize = loadPages * pPager->pageSize;
5314
5315 /* load the file as one chunk */
5316 fileData = allocLarge(loadSize);
5317 if (! fileData)
5318 return SQLITE_NOMEM;
5319 rc = sqlite3OsRead(pPager->fd, fileData, loadSize, 0);
5320 if (rc != SQLITE_OK) {
5321 freeLarge(fileData);
5322 return rc;
5323 }
5324
5325 /* Copy the data to each page. Note that the page numbers we pass to _get
5326 * are one-based, 0 is a marker for no page. We also need to check that we
5327 * haven't loaded more pages than the cache can hold total. There may have
5328 * already been a few pages loaded before, so we may fill the cache before
5329 * loading all of the pages we want to.
5330 */
5331 for(i=1;
5332 i <= loadPages && sqlite3PcachePagecount(pPager->pPCache) < nMax;
5333 i++) {
5334 DbPage *pPage = 0;
5335 rc = sqlite3PagerAcquire2(pPager, i, &pPage, 0,
5336 &fileData[(i-1)*(i64)pPager->pageSize]);
5337 if (rc != SQLITE_OK)
5338 break;
5339 sqlite3PagerUnref(pPage);
5340 }
5341 freeLarge(fileData);
5342 return SQLITE_OK;
5343 }
5344 /* End preload-cache.patch for Chromium */
5345
5346 /*
5347 ** Return a pointer to the data for the specified page.
5348 */
5349 void *sqlite3PagerGetData(DbPage *pPg){
5350 assert( pPg->nRef>0 || pPg->pPager->memDb );
5351 return pPg->pData;
5352 }
5353
5354 /*
5355 ** Return a pointer to the Pager.nExtra bytes of "extra" space
5356 ** allocated along with the specified page.
5357 */
5358 void *sqlite3PagerGetExtra(DbPage *pPg){
5359 return pPg->pExtra;
5360 }
5361
5362 /*
5363 ** Get/set the locking-mode for this pager. Parameter eMode must be one
5364 ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or
5365 ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
5366 ** the locking-mode is set to the value specified.
5367 **
5368 ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
5369 ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
5370 ** locking-mode.
5371 */
5372 int sqlite3PagerLockingMode(Pager *pPager, int eMode){
5373 assert( eMode==PAGER_LOCKINGMODE_QUERY
5374 || eMode==PAGER_LOCKINGMODE_NORMAL
5375 || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
5376 assert( PAGER_LOCKINGMODE_QUERY<0 );
5377 assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
5378 if( eMode>=0 && !pPager->tempFile ){
5379 pPager->exclusiveMode = (u8)eMode;
5380 }
5381 return (int)pPager->exclusiveMode;
5382 }
5383
5384 /*
5385 ** Get/set the journal-mode for this pager. Parameter eMode must be one of:
5386 **
5387 ** PAGER_JOURNALMODE_QUERY
5388 ** PAGER_JOURNALMODE_DELETE
5389 ** PAGER_JOURNALMODE_TRUNCATE
5390 ** PAGER_JOURNALMODE_PERSIST
5391 ** PAGER_JOURNALMODE_OFF
5392 ** PAGER_JOURNALMODE_MEMORY
5393 **
5394 ** If the parameter is not _QUERY, then the journal_mode is set to the
5395 ** value specified if the change is allowed. The change is disallowed
5396 ** for the following reasons:
5397 **
5398 ** * An in-memory database can only have its journal_mode set to _OFF
5399 ** or _MEMORY.
5400 **
5401 ** * The journal mode may not be changed while a transaction is active.
5402 **
5403 ** The returned indicate the current (possibly updated) journal-mode.
5404 */
5405 int sqlite3PagerJournalMode(Pager *pPager, int eMode){
5406 assert( eMode==PAGER_JOURNALMODE_QUERY
5407 || eMode==PAGER_JOURNALMODE_DELETE
5408 || eMode==PAGER_JOURNALMODE_TRUNCATE
5409 || eMode==PAGER_JOURNALMODE_PERSIST
5410 || eMode==PAGER_JOURNALMODE_OFF
5411 || eMode==PAGER_JOURNALMODE_MEMORY );
5412 assert( PAGER_JOURNALMODE_QUERY<0 );
5413 if( eMode>=0
5414 && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY
5415 || eMode==PAGER_JOURNALMODE_OFF)
5416 && !pPager->dbModified
5417 && (!isOpen(pPager->jfd) || 0==pPager->journalOff)
5418 ){
5419 if( isOpen(pPager->jfd) ){
5420 sqlite3OsClose(pPager->jfd);
5421 }
5422 pPager->journalMode = (u8)eMode;
5423 }
5424 return (int)pPager->journalMode;
5425 }
5426
5427 /*
5428 ** Get/set the size-limit used for persistent journal files.
5429 **
5430 ** Setting the size limit to -1 means no limit is enforced.
5431 ** An attempt to set a limit smaller than -1 is a no-op.
5432 */
5433 i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
5434 if( iLimit>=-1 ){
5435 pPager->journalSizeLimit = iLimit;
5436 }
5437 return pPager->journalSizeLimit;
5438 }
5439
5440 /*
5441 ** Return a pointer to the pPager->pBackup variable. The backup module
5442 ** in backup.c maintains the content of this variable. This module
5443 ** uses it opaquely as an argument to sqlite3BackupRestart() and
5444 ** sqlite3BackupUpdate() only.
5445 */
5446 sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
5447 return &pPager->pBackup;
5448 }
5449
5450 #endif /* SQLITE_OMIT_DISKIO */
OLDNEW
« no previous file with comments | « third_party/sqlite/src/pager.h ('k') | third_party/sqlite/src/parse.y » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698