OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2008 October 7 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ************************************************************************* |
| 12 ** |
| 13 ** This file contains code use to implement an in-memory rollback journal. |
| 14 ** The in-memory rollback journal is used to journal transactions for |
| 15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. |
| 16 ** |
| 17 ** Update: The in-memory journal is also used to temporarily cache |
| 18 ** smaller journals that are not critical for power-loss recovery. |
| 19 ** For example, statement journals that are not too big will be held |
| 20 ** entirely in memory, thus reducing the number of file I/O calls, and |
| 21 ** more importantly, reducing temporary file creation events. If these |
| 22 ** journals become too large for memory, they are spilled to disk. But |
| 23 ** in the common case, they are usually small and no file I/O needs to |
| 24 ** occur. |
| 25 */ |
| 26 #include "sqliteInt.h" |
| 27 |
| 28 /* Forward references to internal structures */ |
| 29 typedef struct MemJournal MemJournal; |
| 30 typedef struct FilePoint FilePoint; |
| 31 typedef struct FileChunk FileChunk; |
| 32 |
| 33 /* |
| 34 ** The rollback journal is composed of a linked list of these structures. |
| 35 ** |
| 36 ** The zChunk array is always at least 8 bytes in size - usually much more. |
| 37 ** Its actual size is stored in the MemJournal.nChunkSize variable. |
| 38 */ |
| 39 struct FileChunk { |
| 40 FileChunk *pNext; /* Next chunk in the journal */ |
| 41 u8 zChunk[8]; /* Content of this chunk */ |
| 42 }; |
| 43 |
| 44 /* |
| 45 ** By default, allocate this many bytes of memory for each FileChunk object. |
| 46 */ |
| 47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024 |
| 48 |
| 49 /* |
| 50 ** For chunk size nChunkSize, return the number of bytes that should |
| 51 ** be allocated for each FileChunk structure. |
| 52 */ |
| 53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8)) |
| 54 |
| 55 /* |
| 56 ** An instance of this object serves as a cursor into the rollback journal. |
| 57 ** The cursor can be either for reading or writing. |
| 58 */ |
| 59 struct FilePoint { |
| 60 sqlite3_int64 iOffset; /* Offset from the beginning of the file */ |
| 61 FileChunk *pChunk; /* Specific chunk into which cursor points */ |
| 62 }; |
| 63 |
| 64 /* |
| 65 ** This structure is a subclass of sqlite3_file. Each open memory-journal |
| 66 ** is an instance of this class. |
| 67 */ |
| 68 struct MemJournal { |
| 69 const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ |
| 70 int nChunkSize; /* In-memory chunk-size */ |
| 71 |
| 72 int nSpill; /* Bytes of data before flushing */ |
| 73 int nSize; /* Bytes of data currently in memory */ |
| 74 FileChunk *pFirst; /* Head of in-memory chunk-list */ |
| 75 FilePoint endpoint; /* Pointer to the end of the file */ |
| 76 FilePoint readpoint; /* Pointer to the end of the last xRead() */ |
| 77 |
| 78 int flags; /* xOpen flags */ |
| 79 sqlite3_vfs *pVfs; /* The "real" underlying VFS */ |
| 80 const char *zJournal; /* Name of the journal file */ |
| 81 }; |
| 82 |
| 83 /* |
| 84 ** Read data from the in-memory journal file. This is the implementation |
| 85 ** of the sqlite3_vfs.xRead method. |
| 86 */ |
| 87 static int memjrnlRead( |
| 88 sqlite3_file *pJfd, /* The journal file from which to read */ |
| 89 void *zBuf, /* Put the results here */ |
| 90 int iAmt, /* Number of bytes to read */ |
| 91 sqlite_int64 iOfst /* Begin reading at this offset */ |
| 92 ){ |
| 93 MemJournal *p = (MemJournal *)pJfd; |
| 94 u8 *zOut = zBuf; |
| 95 int nRead = iAmt; |
| 96 int iChunkOffset; |
| 97 FileChunk *pChunk; |
| 98 |
| 99 #ifdef SQLITE_ENABLE_ATOMIC_WRITE |
| 100 if( (iAmt+iOfst)>p->endpoint.iOffset ){ |
| 101 return SQLITE_IOERR_SHORT_READ; |
| 102 } |
| 103 #endif |
| 104 |
| 105 assert( (iAmt+iOfst)<=p->endpoint.iOffset ); |
| 106 assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 ); |
| 107 if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ |
| 108 sqlite3_int64 iOff = 0; |
| 109 for(pChunk=p->pFirst; |
| 110 ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst; |
| 111 pChunk=pChunk->pNext |
| 112 ){ |
| 113 iOff += p->nChunkSize; |
| 114 } |
| 115 }else{ |
| 116 pChunk = p->readpoint.pChunk; |
| 117 assert( pChunk!=0 ); |
| 118 } |
| 119 |
| 120 iChunkOffset = (int)(iOfst%p->nChunkSize); |
| 121 do { |
| 122 int iSpace = p->nChunkSize - iChunkOffset; |
| 123 int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset)); |
| 124 memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy); |
| 125 zOut += nCopy; |
| 126 nRead -= iSpace; |
| 127 iChunkOffset = 0; |
| 128 } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); |
| 129 p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0; |
| 130 p->readpoint.pChunk = pChunk; |
| 131 |
| 132 return SQLITE_OK; |
| 133 } |
| 134 |
| 135 /* |
| 136 ** Free the list of FileChunk structures headed at MemJournal.pFirst. |
| 137 */ |
| 138 static void memjrnlFreeChunks(MemJournal *p){ |
| 139 FileChunk *pIter; |
| 140 FileChunk *pNext; |
| 141 for(pIter=p->pFirst; pIter; pIter=pNext){ |
| 142 pNext = pIter->pNext; |
| 143 sqlite3_free(pIter); |
| 144 } |
| 145 p->pFirst = 0; |
| 146 } |
| 147 |
| 148 /* |
| 149 ** Flush the contents of memory to a real file on disk. |
| 150 */ |
| 151 static int memjrnlCreateFile(MemJournal *p){ |
| 152 int rc; |
| 153 sqlite3_file *pReal = (sqlite3_file*)p; |
| 154 MemJournal copy = *p; |
| 155 |
| 156 memset(p, 0, sizeof(MemJournal)); |
| 157 rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0); |
| 158 if( rc==SQLITE_OK ){ |
| 159 int nChunk = copy.nChunkSize; |
| 160 i64 iOff = 0; |
| 161 FileChunk *pIter; |
| 162 for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){ |
| 163 if( iOff + nChunk > copy.endpoint.iOffset ){ |
| 164 nChunk = copy.endpoint.iOffset - iOff; |
| 165 } |
| 166 rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff); |
| 167 if( rc ) break; |
| 168 iOff += nChunk; |
| 169 } |
| 170 if( rc==SQLITE_OK ){ |
| 171 /* No error has occurred. Free the in-memory buffers. */ |
| 172 memjrnlFreeChunks(©); |
| 173 } |
| 174 } |
| 175 if( rc!=SQLITE_OK ){ |
| 176 /* If an error occurred while creating or writing to the file, restore |
| 177 ** the original before returning. This way, SQLite uses the in-memory |
| 178 ** journal data to roll back changes made to the internal page-cache |
| 179 ** before this function was called. */ |
| 180 sqlite3OsClose(pReal); |
| 181 *p = copy; |
| 182 } |
| 183 return rc; |
| 184 } |
| 185 |
| 186 |
| 187 /* |
| 188 ** Write data to the file. |
| 189 */ |
| 190 static int memjrnlWrite( |
| 191 sqlite3_file *pJfd, /* The journal file into which to write */ |
| 192 const void *zBuf, /* Take data to be written from here */ |
| 193 int iAmt, /* Number of bytes to write */ |
| 194 sqlite_int64 iOfst /* Begin writing at this offset into the file */ |
| 195 ){ |
| 196 MemJournal *p = (MemJournal *)pJfd; |
| 197 int nWrite = iAmt; |
| 198 u8 *zWrite = (u8 *)zBuf; |
| 199 |
| 200 /* If the file should be created now, create it and write the new data |
| 201 ** into the file on disk. */ |
| 202 if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){ |
| 203 int rc = memjrnlCreateFile(p); |
| 204 if( rc==SQLITE_OK ){ |
| 205 rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst); |
| 206 } |
| 207 return rc; |
| 208 } |
| 209 |
| 210 /* If the contents of this write should be stored in memory */ |
| 211 else{ |
| 212 /* An in-memory journal file should only ever be appended to. Random |
| 213 ** access writes are not required. The only exception to this is when |
| 214 ** the in-memory journal is being used by a connection using the |
| 215 ** atomic-write optimization. In this case the first 28 bytes of the |
| 216 ** journal file may be written as part of committing the transaction. */ |
| 217 assert( iOfst==p->endpoint.iOffset || iOfst==0 ); |
| 218 #ifdef SQLITE_ENABLE_ATOMIC_WRITE |
| 219 if( iOfst==0 && p->pFirst ){ |
| 220 assert( p->nChunkSize>iAmt ); |
| 221 memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); |
| 222 }else |
| 223 #else |
| 224 assert( iOfst>0 || p->pFirst==0 ); |
| 225 #endif |
| 226 { |
| 227 while( nWrite>0 ){ |
| 228 FileChunk *pChunk = p->endpoint.pChunk; |
| 229 int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize); |
| 230 int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset); |
| 231 |
| 232 if( iChunkOffset==0 ){ |
| 233 /* New chunk is required to extend the file. */ |
| 234 FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize)); |
| 235 if( !pNew ){ |
| 236 return SQLITE_IOERR_NOMEM_BKPT; |
| 237 } |
| 238 pNew->pNext = 0; |
| 239 if( pChunk ){ |
| 240 assert( p->pFirst ); |
| 241 pChunk->pNext = pNew; |
| 242 }else{ |
| 243 assert( !p->pFirst ); |
| 244 p->pFirst = pNew; |
| 245 } |
| 246 p->endpoint.pChunk = pNew; |
| 247 } |
| 248 |
| 249 memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace); |
| 250 zWrite += iSpace; |
| 251 nWrite -= iSpace; |
| 252 p->endpoint.iOffset += iSpace; |
| 253 } |
| 254 p->nSize = iAmt + iOfst; |
| 255 } |
| 256 } |
| 257 |
| 258 return SQLITE_OK; |
| 259 } |
| 260 |
| 261 /* |
| 262 ** Truncate the file. |
| 263 ** |
| 264 ** If the journal file is already on disk, truncate it there. Or, if it |
| 265 ** is still in main memory but is being truncated to zero bytes in size, |
| 266 ** ignore |
| 267 */ |
| 268 static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ |
| 269 MemJournal *p = (MemJournal *)pJfd; |
| 270 if( ALWAYS(size==0) ){ |
| 271 memjrnlFreeChunks(p); |
| 272 p->nSize = 0; |
| 273 p->endpoint.pChunk = 0; |
| 274 p->endpoint.iOffset = 0; |
| 275 p->readpoint.pChunk = 0; |
| 276 p->readpoint.iOffset = 0; |
| 277 } |
| 278 return SQLITE_OK; |
| 279 } |
| 280 |
| 281 /* |
| 282 ** Close the file. |
| 283 */ |
| 284 static int memjrnlClose(sqlite3_file *pJfd){ |
| 285 MemJournal *p = (MemJournal *)pJfd; |
| 286 memjrnlFreeChunks(p); |
| 287 return SQLITE_OK; |
| 288 } |
| 289 |
| 290 /* |
| 291 ** Sync the file. |
| 292 ** |
| 293 ** If the real file has been created, call its xSync method. Otherwise, |
| 294 ** syncing an in-memory journal is a no-op. |
| 295 */ |
| 296 static int memjrnlSync(sqlite3_file *pJfd, int flags){ |
| 297 UNUSED_PARAMETER2(pJfd, flags); |
| 298 return SQLITE_OK; |
| 299 } |
| 300 |
| 301 /* |
| 302 ** Query the size of the file in bytes. |
| 303 */ |
| 304 static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ |
| 305 MemJournal *p = (MemJournal *)pJfd; |
| 306 *pSize = (sqlite_int64) p->endpoint.iOffset; |
| 307 return SQLITE_OK; |
| 308 } |
| 309 |
| 310 /* |
| 311 ** Table of methods for MemJournal sqlite3_file object. |
| 312 */ |
| 313 static const struct sqlite3_io_methods MemJournalMethods = { |
| 314 1, /* iVersion */ |
| 315 memjrnlClose, /* xClose */ |
| 316 memjrnlRead, /* xRead */ |
| 317 memjrnlWrite, /* xWrite */ |
| 318 memjrnlTruncate, /* xTruncate */ |
| 319 memjrnlSync, /* xSync */ |
| 320 memjrnlFileSize, /* xFileSize */ |
| 321 0, /* xLock */ |
| 322 0, /* xUnlock */ |
| 323 0, /* xCheckReservedLock */ |
| 324 0, /* xFileControl */ |
| 325 0, /* xSectorSize */ |
| 326 0, /* xDeviceCharacteristics */ |
| 327 0, /* xShmMap */ |
| 328 0, /* xShmLock */ |
| 329 0, /* xShmBarrier */ |
| 330 0, /* xShmUnmap */ |
| 331 0, /* xFetch */ |
| 332 0 /* xUnfetch */ |
| 333 }; |
| 334 |
| 335 /* |
| 336 ** Open a journal file. |
| 337 ** |
| 338 ** The behaviour of the journal file depends on the value of parameter |
| 339 ** nSpill. If nSpill is 0, then the journal file is always create and |
| 340 ** accessed using the underlying VFS. If nSpill is less than zero, then |
| 341 ** all content is always stored in main-memory. Finally, if nSpill is a |
| 342 ** positive value, then the journal file is initially created in-memory |
| 343 ** but may be flushed to disk later on. In this case the journal file is |
| 344 ** flushed to disk either when it grows larger than nSpill bytes in size, |
| 345 ** or when sqlite3JournalCreate() is called. |
| 346 */ |
| 347 int sqlite3JournalOpen( |
| 348 sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ |
| 349 const char *zName, /* Name of the journal file */ |
| 350 sqlite3_file *pJfd, /* Preallocated, blank file handle */ |
| 351 int flags, /* Opening flags */ |
| 352 int nSpill /* Bytes buffered before opening the file */ |
| 353 ){ |
| 354 MemJournal *p = (MemJournal*)pJfd; |
| 355 |
| 356 /* Zero the file-handle object. If nSpill was passed zero, initialize |
| 357 ** it using the sqlite3OsOpen() function of the underlying VFS. In this |
| 358 ** case none of the code in this module is executed as a result of calls |
| 359 ** made on the journal file-handle. */ |
| 360 memset(p, 0, sizeof(MemJournal)); |
| 361 if( nSpill==0 ){ |
| 362 return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); |
| 363 } |
| 364 |
| 365 if( nSpill>0 ){ |
| 366 p->nChunkSize = nSpill; |
| 367 }else{ |
| 368 p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk); |
| 369 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) ); |
| 370 } |
| 371 |
| 372 p->pMethod = (const sqlite3_io_methods*)&MemJournalMethods; |
| 373 p->nSpill = nSpill; |
| 374 p->flags = flags; |
| 375 p->zJournal = zName; |
| 376 p->pVfs = pVfs; |
| 377 return SQLITE_OK; |
| 378 } |
| 379 |
| 380 /* |
| 381 ** Open an in-memory journal file. |
| 382 */ |
| 383 void sqlite3MemJournalOpen(sqlite3_file *pJfd){ |
| 384 sqlite3JournalOpen(0, 0, pJfd, 0, -1); |
| 385 } |
| 386 |
| 387 #ifdef SQLITE_ENABLE_ATOMIC_WRITE |
| 388 /* |
| 389 ** If the argument p points to a MemJournal structure that is not an |
| 390 ** in-memory-only journal file (i.e. is one that was opened with a +ve |
| 391 ** nSpill parameter), and the underlying file has not yet been created, |
| 392 ** create it now. |
| 393 */ |
| 394 int sqlite3JournalCreate(sqlite3_file *p){ |
| 395 int rc = SQLITE_OK; |
| 396 if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){ |
| 397 rc = memjrnlCreateFile((MemJournal*)p); |
| 398 } |
| 399 return rc; |
| 400 } |
| 401 #endif |
| 402 |
| 403 /* |
| 404 ** The file-handle passed as the only argument is open on a journal file. |
| 405 ** Return true if this "journal file" is currently stored in heap memory, |
| 406 ** or false otherwise. |
| 407 */ |
| 408 int sqlite3JournalIsInMemory(sqlite3_file *p){ |
| 409 return p->pMethods==&MemJournalMethods; |
| 410 } |
| 411 |
| 412 /* |
| 413 ** Return the number of bytes required to store a JournalFile that uses vfs |
| 414 ** pVfs to create the underlying on-disk files. |
| 415 */ |
| 416 int sqlite3JournalSize(sqlite3_vfs *pVfs){ |
| 417 return MAX(pVfs->szOsFile, (int)sizeof(MemJournal)); |
| 418 } |
OLD | NEW |