Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: third_party/sqlite/src/src/os_unix.c

Issue 949043002: Add //third_party/sqlite to dirs_to_snapshot, remove net_sql.patch (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/sqlite/src/src/os_setup.h ('k') | third_party/sqlite/src/src/os_win.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/sqlite/src/src/os_unix.c
diff --git a/third_party/sqlite/src/src/os_unix.c b/third_party/sqlite/src/src/os_unix.c
index 77ffd8ac0d9a50c0328d03aff3dca52ccd5bb52f..1624f6a2ce220925271aab83826f0ed73219fa39 100644
--- a/third_party/sqlite/src/src/os_unix.c
+++ b/third_party/sqlite/src/src/os_unix.c
@@ -84,32 +84,6 @@
#endif
/*
-** These #defines should enable >2GB file support on Posix if the
-** underlying operating system supports it. If the OS lacks
-** large file support, these should be no-ops.
-**
-** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
-** on the compiler command line. This is necessary if you are compiling
-** on a recent machine (ex: RedHat 7.2) but you want your code to work
-** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
-** without this option, LFS is enable. But LFS does not exist in the kernel
-** in RedHat 6.0, so the code won't work. Hence, for maximum binary
-** portability you should omit LFS.
-**
-** The previous paragraph was written in 2005. (This paragraph is written
-** on 2008-11-28.) These days, all Linux kernels support large files, so
-** you should probably leave LFS enabled. But some embedded platforms might
-** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful.
-*/
-#ifndef SQLITE_DISABLE_LFS
-# define _LARGE_FILE 1
-# ifndef _FILE_OFFSET_BITS
-# define _FILE_OFFSET_BITS 64
-# endif
-# define _LARGEFILE_SOURCE 1
-#endif
-
-/*
** standard include files.
*/
#include <sys/types.h>
@@ -119,11 +93,11 @@
#include <time.h>
#include <sys/time.h>
#include <errno.h>
-#ifndef SQLITE_OMIT_WAL
-#include <sys/mman.h>
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+# include <sys/mman.h>
#endif
-#if SQLITE_ENABLE_LOCKING_STYLE
+#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS
# include <sys/ioctl.h>
# if OS_VXWORKS
# include <semaphore.h>
@@ -138,6 +112,10 @@
# include <sys/mount.h>
#endif
+#ifdef HAVE_UTIME
+# include <utime.h>
+#endif
+
/*
** Allowed values of unixFile.fsFlags
*/
@@ -160,8 +138,8 @@
#endif
/*
- ** Default permissions when creating auto proxy dir
- */
+** Default permissions when creating auto proxy dir
+*/
#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
#endif
@@ -202,16 +180,28 @@ struct UnixUnusedFd {
typedef struct unixFile unixFile;
struct unixFile {
sqlite3_io_methods const *pMethod; /* Always the first entry */
+ sqlite3_vfs *pVfs; /* The VFS that created this unixFile */
unixInodeInfo *pInode; /* Info about locks on this inode */
int h; /* The file descriptor */
unsigned char eFileLock; /* The type of lock held on this fd */
- unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */
+ unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */
int lastErrno; /* The unix errno from last I/O error */
void *lockingContext; /* Locking style specific state */
UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */
const char *zPath; /* Name of the file */
unixShm *pShm; /* Shared memory segment information */
int szChunk; /* Configured by FCNTL_CHUNK_SIZE */
+#if SQLITE_MAX_MMAP_SIZE>0
+ int nFetchOut; /* Number of outstanding xFetch refs */
+ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
+ sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */
+ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */
+ void *pMapRegion; /* Memory mapped region */
+#endif
+#ifdef __QNXNTO__
+ int sectorSize; /* Device sector size */
+ int deviceCharacteristics; /* Precomputed device characteristics */
+#endif
#if SQLITE_ENABLE_LOCKING_STYLE
int openFlags; /* The flags specified at open() */
#endif
@@ -219,10 +209,9 @@ struct unixFile {
unsigned fsFlags; /* cached details from statfs() */
#endif
#if OS_VXWORKS
- int isDelete; /* Delete on close if true */
struct vxworksFileId *pId; /* Unique file ID */
#endif
-#ifndef NDEBUG
+#ifdef SQLITE_DEBUG
/* The next group of variables are used to track whether or not the
** transaction counter in bytes 24-27 of database files are updated
** whenever any part of the database changes. An assertion fault will
@@ -233,7 +222,9 @@ struct unixFile {
unsigned char transCntrChng; /* True if the transaction counter changed */
unsigned char dbUpdate; /* True if any part of database file changed */
unsigned char inNormalWrite; /* True if in a normal write operation */
+
#endif
+
#ifdef SQLITE_TEST
/* In test mode, increase the size of this structure a bit so that
** it is larger than the struct CrashFile defined in test6.c.
@@ -242,12 +233,28 @@ struct unixFile {
#endif
};
+/* This variable holds the process id (pid) from when the xRandomness()
+** method was called. If xOpen() is called from a different process id,
+** indicating that a fork() has occurred, the PRNG will be reset.
+*/
+static int randomnessPid = 0;
+
/*
** Allowed values for the unixFile.ctrlFlags bitmask:
*/
-#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
-#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
-#define UNIXFILE_DIRSYNC 0x04 /* Directory sync needed */
+#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
+#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
+#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
+#ifndef SQLITE_DISABLE_DIRSYNC
+# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */
+#else
+# define UNIXFILE_DIRSYNC 0x00
+#endif
+#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
+#define UNIXFILE_DELETE 0x20 /* Delete on close */
+#define UNIXFILE_URI 0x40 /* Filename might have query parameters */
+#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */
+#define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings have been issued */
/*
** Include code that is common to all os_*.c files
@@ -281,8 +288,53 @@ struct unixFile {
#define threadid 0
#endif
+/*
+** HAVE_MREMAP defaults to true on Linux and false everywhere else.
+*/
+#if !defined(HAVE_MREMAP)
+# if defined(__linux__) && defined(_GNU_SOURCE)
+# define HAVE_MREMAP 1
+# else
+# define HAVE_MREMAP 0
+# endif
+#endif
+
+/*
+** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
+** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
+*/
+#ifdef __ANDROID__
+# define lseek lseek64
+#endif
+
+/*
+** Different Unix systems declare open() in different ways. Same use
+** open(const char*,int,mode_t). Others use open(const char*,int,...).
+** The difference is important when using a pointer to the function.
+**
+** The safest way to deal with the problem is to always use this wrapper
+** which always has the same well-defined interface.
+*/
+static int posixOpen(const char *zFile, int flags, int mode){
+ return open(zFile, flags, mode);
+}
+
+/*
+** On some systems, calls to fchown() will trigger a message in a security
+** log if they come from non-root processes. So avoid calling fchown() if
+** we are not running as root.
+*/
+static int posixFchown(int fd, uid_t uid, gid_t gid){
+#if OS_VXWORKS
+ return 0;
+#else
+ return geteuid() ? 0 : fchown(fd,uid,gid);
+#endif
+}
+
/* Forward reference */
static int openDirectory(const char*, int*);
+static int unixGetpagesize(void);
/*
** Many system calls are accessed through pointer-to-functions so that
@@ -291,12 +343,12 @@ static int openDirectory(const char*, int*);
** to all overrideable system calls.
*/
static struct unix_syscall {
- const char *zName; /* Name of the sytem call */
+ const char *zName; /* Name of the system call */
sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
sqlite3_syscall_ptr pDefault; /* Default value */
} aSyscall[] = {
- { "open", (sqlite3_syscall_ptr)open, 0 },
-#define osOpen ((int(*)(const char*,int,...))aSyscall[0].pCurrent)
+ { "open", (sqlite3_syscall_ptr)posixOpen, 0 },
+#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
{ "close", (sqlite3_syscall_ptr)close, 0 },
#define osClose ((int(*)(int))aSyscall[1].pCurrent)
@@ -333,7 +385,7 @@ static struct unix_syscall {
{ "read", (sqlite3_syscall_ptr)read, 0 },
#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
-#if defined(USE_PREAD) || defined(SQLITE_ENABLE_LOCKING_STYLE)
+#if defined(USE_PREAD) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)
{ "pread", (sqlite3_syscall_ptr)pread, 0 },
#else
{ "pread", (sqlite3_syscall_ptr)0, 0 },
@@ -350,7 +402,7 @@ static struct unix_syscall {
{ "write", (sqlite3_syscall_ptr)write, 0 },
#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
-#if defined(USE_PREAD) || defined(SQLITE_ENABLE_LOCKING_STYLE)
+#if defined(USE_PREAD) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)
{ "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },
#else
{ "pwrite", (sqlite3_syscall_ptr)0, 0 },
@@ -366,11 +418,7 @@ static struct unix_syscall {
#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\
aSyscall[13].pCurrent)
-#if SQLITE_ENABLE_LOCKING_STYLE
{ "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },
-#else
- { "fchmod", (sqlite3_syscall_ptr)0, 0 },
-#endif
#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
@@ -386,6 +434,33 @@ static struct unix_syscall {
{ "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
+ { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },
+#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
+
+ { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
+#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
+
+ { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 },
+#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
+
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+ { "mmap", (sqlite3_syscall_ptr)mmap, 0 },
+#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)
+
+ { "munmap", (sqlite3_syscall_ptr)munmap, 0 },
+#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent)
+
+#if HAVE_MREMAP
+ { "mremap", (sqlite3_syscall_ptr)mremap, 0 },
+#else
+ { "mremap", (sqlite3_syscall_ptr)0, 0 },
+#endif
+#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
+ { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 },
+#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent)
+
+#endif
+
}; /* End of the overrideable system calls */
/*
@@ -472,12 +547,66 @@ static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){
}
/*
-** Retry open() calls that fail due to EINTR
+** Do not accept any file descriptor less than this value, in order to avoid
+** opening database file using file descriptors that are commonly used for
+** standard input, output, and error.
*/
-static int robust_open(const char *z, int f, int m){
- int rc;
- do{ rc = osOpen(z,f,m); }while( rc<0 && errno==EINTR );
- return rc;
+#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
+# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
+#endif
+
+/*
+** Invoke open(). Do so multiple times, until it either succeeds or
+** fails for some reason other than EINTR.
+**
+** If the file creation mode "m" is 0 then set it to the default for
+** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
+** 0644) as modified by the system umask. If m is not 0, then
+** make the file creation mode be exactly m ignoring the umask.
+**
+** The m parameter will be non-zero only when creating -wal, -journal,
+** and -shm files. We want those files to have *exactly* the same
+** permissions as their original database, unadulterated by the umask.
+** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
+** transaction crashes and leaves behind hot journals, then any
+** process that is able to write to the database will also be able to
+** recover the hot journals.
+*/
+static int robust_open(const char *z, int f, mode_t m){
+ int fd;
+ mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
+ while(1){
+#if defined(O_CLOEXEC)
+ fd = osOpen(z,f|O_CLOEXEC,m2);
+#else
+ fd = osOpen(z,f,m2);
+#endif
+ if( fd<0 ){
+ if( errno==EINTR ) continue;
+ break;
+ }
+ if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
+ osClose(fd);
+ sqlite3_log(SQLITE_WARNING,
+ "attempt to open \"%s\" as file descriptor %d", z, fd);
+ fd = -1;
+ if( osOpen("/dev/null", f, m)<0 ) break;
+ }
+ if( fd>=0 ){
+ if( m!=0 ){
+ struct stat statbuf;
+ if( osFstat(fd, &statbuf)==0
+ && statbuf.st_size==0
+ && (statbuf.st_mode&0777)!=m
+ ){
+ osFchmod(fd, m);
+ }
+ }
+#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
+ osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
+#endif
+ }
+ return fd;
}
/*
@@ -507,10 +636,10 @@ static int unixMutexHeld(void) {
#endif
-#ifdef SQLITE_DEBUG
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/*
** Helper function for printing out trace information from debugging
-** binaries. This returns the string represetation of the supplied
+** binaries. This returns the string representation of the supplied
** integer lock-type.
*/
static const char *azFileLock(int eFileLock){
@@ -587,9 +716,22 @@ static int lockTrace(int fd, int op, struct flock *p){
/*
** Retry ftruncate() calls that fail due to EINTR
+**
+** All calls to ftruncate() within this file should be made through this wrapper.
+** On the Android platform, bypassing the logic below could lead to a corrupt
+** database.
*/
static int robust_ftruncate(int h, sqlite3_int64 sz){
int rc;
+#ifdef __ANDROID__
+ /* On Android, ftruncate() always uses 32-bit offsets, even if
+ ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to
+ ** truncate a file to any size larger than 2GiB. Silently ignore any
+ ** such attempts. */
+ if( sz>(sqlite3_int64)0x7FFFFFFF ){
+ rc = SQLITE_OK;
+ }else
+#endif
do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );
return rc;
}
@@ -634,25 +776,15 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
case EACCES:
/* EACCES is like EAGAIN during locking operations, but not any other time*/
if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
- (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
- (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
- (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
+ (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
+ (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
+ (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
return SQLITE_BUSY;
}
/* else fall through */
case EPERM:
return SQLITE_PERM;
- /* EDEADLK is only possible if a call to fcntl(F_SETLKW) is made. And
- ** this module never makes such a call. And the code in SQLite itself
- ** asserts that SQLITE_IOERR_BLOCKED is never returned. For these reasons
- ** this case is also commented out. If the system does set errno to EDEADLK,
- ** the default SQLITE_IOERR_XXX code will be returned. */
-#if 0
- case EDEADLK:
- return SQLITE_IOERR_BLOCKED;
-#endif
-
#if EOPNOTSUPP!=ENOTSUP
case EOPNOTSUPP:
/* something went terribly awry, unless during file system support
@@ -670,7 +802,9 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
case ENODEV:
case ENXIO:
case ENOENT:
+#ifdef ESTALE /* ESTALE is not defined on Interix systems */
case ESTALE:
+#endif
case ENOSYS:
/* these should force the client to close the file and reconnect */
@@ -680,7 +814,6 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
}
-
/******************************************************************************
****************** Begin Unique File ID Utility Used By VxWorks ***************
**
@@ -942,7 +1075,7 @@ struct unixInodeInfo {
UnixUnusedFd *pUnused; /* Unused file descriptors to close */
unixInodeInfo *pNext; /* List of all unixInodeInfo objects */
unixInodeInfo *pPrev; /* .... doubly linked */
-#if defined(SQLITE_ENABLE_LOCKING_STYLE)
+#if SQLITE_ENABLE_LOCKING_STYLE
unsigned long long sharedByte; /* for AFP simulated shared lock */
#endif
#if OS_VXWORKS
@@ -969,7 +1102,7 @@ static unixInodeInfo *inodeList = 0;
** The first argument passed to the macro should be the error code that
** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN).
** The two subsequent arguments should be the name of the OS function that
-** failed (e.g. "unlink", "open") and the the associated file-system path,
+** failed (e.g. "unlink", "open") and the associated file-system path,
** if any.
*/
#define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__)
@@ -992,7 +1125,7 @@ static int unixLogErrorAtLine(
zErr = aErr;
/* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined,
- ** assume that the system provides the the GNU version of strerror_r() that
+ ** assume that the system provides the GNU version of strerror_r() that
** returns a pointer to a buffer containing the error message. That pointer
** may point to aErr[], or it may point to some static storage somewhere.
** Otherwise, assume that the system provides the POSIX version of
@@ -1016,7 +1149,6 @@ static int unixLogErrorAtLine(
zErr = strerror(iErrno);
#endif
- assert( errcode!=SQLITE_OK );
if( zPath==0 ) zPath = "";
sqlite3_log(errcode,
"os_unix.c:%d: (%d) %s(%s) - %s",
@@ -1181,6 +1313,66 @@ static int findInodeInfo(
return SQLITE_OK;
}
+/*
+** Return TRUE if pFile has been renamed or unlinked since it was first opened.
+*/
+static int fileHasMoved(unixFile *pFile){
+#if OS_VXWORKS
+ return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId;
+#else
+ struct stat buf;
+
+ /* TODO(shess): This check doesn't work when the Chromium's WebDB code is
+ ** running in the sandbox.
+ */
+ return 0;
+
+ return pFile->pInode!=0 &&
+ (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino);
+#endif
+}
+
+
+/*
+** Check a unixFile that is a database. Verify the following:
+**
+** (1) There is exactly one hard link on the file
+** (2) The file is not a symbolic link
+** (3) The file has not been renamed or unlinked
+**
+** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
+*/
+static void verifyDbFile(unixFile *pFile){
+ struct stat buf;
+ int rc;
+ if( pFile->ctrlFlags & UNIXFILE_WARNED ){
+ /* One or more of the following warnings have already been issued. Do not
+ ** repeat them so as not to clutter the error log */
+ return;
+ }
+ rc = osFstat(pFile->h, &buf);
+ if( rc!=0 ){
+ sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+ if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
+ sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+ if( buf.st_nlink>1 ){
+ sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+ if( fileHasMoved(pFile) ){
+ sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
+ pFile->ctrlFlags |= UNIXFILE_WARNED;
+ return;
+ }
+}
+
/*
** This routine checks if there is a RESERVED lock held on the specified
@@ -1340,14 +1532,14 @@ static int unixLock(sqlite3_file *id, int eFileLock){
*/
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
- unixInodeInfo *pInode = pFile->pInode;
+ unixInodeInfo *pInode;
struct flock lock;
int tErrno = 0;
assert( pFile );
OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,
azFileLock(eFileLock), azFileLock(pFile->eFileLock),
- azFileLock(pInode->eFileLock), pInode->nShared , getpid()));
+ azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared , getpid()));
/* If there is already a lock of this type or more restrictive on the
** unixFile, do nothing. Don't use the end_lock: exit path, as
@@ -1488,7 +1680,7 @@ static int unixLock(sqlite3_file *id, int eFileLock){
}
-#ifndef NDEBUG
+#ifdef SQLITE_DEBUG
/* Set up the transaction-counter change checking flags when
** transitioning from a SHARED to a RESERVED lock. The change
** from SHARED to RESERVED marks the beginning of a normal
@@ -1551,7 +1743,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
unixInodeInfo *pInode;
struct flock lock;
int rc = SQLITE_OK;
- int h;
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,
@@ -1563,16 +1754,12 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
return SQLITE_OK;
}
unixEnterMutex();
- h = pFile->h;
pInode = pFile->pInode;
assert( pInode->nShared!=0 );
if( pFile->eFileLock>SHARED_LOCK ){
assert( pInode->eFileLock==pFile->eFileLock );
- SimulateIOErrorBenign(1);
- SimulateIOError( h=(-1) )
- SimulateIOErrorBenign(0);
-#ifndef NDEBUG
+#ifdef SQLITE_DEBUG
/* When reducing a lock such that other processes can start
** reading the database file again, make sure that the
** transaction counter was updated if any part of the database
@@ -1581,11 +1768,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
** the file has changed and hence might not know to flush their
** cache. The use of a stale cache can lead to database corruption.
*/
-#if 0
- assert( pFile->inNormalWrite==0
- || pFile->dbUpdate==0
- || pFile->transCntrChng==1 );
-#endif
pFile->inNormalWrite = 0;
#endif
@@ -1687,14 +1869,11 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
lock.l_start = lock.l_len = 0L;
- SimulateIOErrorBenign(1);
- SimulateIOError( h=(-1) )
- SimulateIOErrorBenign(0);
if( unixFileLock(pFile, &lock)==0 ){
pInode->eFileLock = NO_LOCK;
}else{
rc = SQLITE_IOERR_UNLOCK;
- pFile->lastErrno = errno;
+ pFile->lastErrno = errno;
pInode->eFileLock = NO_LOCK;
pFile->eFileLock = NO_LOCK;
}
@@ -1710,7 +1889,7 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
closePendingFds(pFile);
}
}
-
+
end_unlock:
unixLeaveMutex();
if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
@@ -1725,9 +1904,17 @@ end_unlock:
** the requested locking level, this routine is a no-op.
*/
static int unixUnlock(sqlite3_file *id, int eFileLock){
+#if SQLITE_MAX_MMAP_SIZE>0
+ assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
+#endif
return posixUnlock(id, eFileLock, 0);
}
+#if SQLITE_MAX_MMAP_SIZE>0
+static int unixMapfile(unixFile *pFd, i64 nByte);
+static void unixUnmapfile(unixFile *pFd);
+#endif
+
/*
** This function performs the parts of the "close file" operation
** common to all locking schemes. It closes the directory and file
@@ -1740,19 +1927,29 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
*/
static int closeUnixFile(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
+#if SQLITE_MAX_MMAP_SIZE>0
+ unixUnmapfile(pFile);
+#endif
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
pFile->h = -1;
}
#if OS_VXWORKS
if( pFile->pId ){
- if( pFile->isDelete ){
+ if( pFile->ctrlFlags & UNIXFILE_DELETE ){
osUnlink(pFile->pId->zCanonicalName);
}
vxworksReleaseFileId(pFile->pId);
pFile->pId = 0;
}
#endif
+#ifdef SQLITE_UNLINK_AFTER_CLOSE
+ if( pFile->ctrlFlags & UNIXFILE_DELETE ){
+ osUnlink(pFile->zPath);
+ sqlite3_free(*(char**)&pFile->zPath);
+ pFile->zPath = 0;
+ }
+#endif
OSTRACE(("CLOSE %-3d\n", pFile->h));
OpenCounter(-1);
sqlite3_free(pFile->pUnused);
@@ -1766,6 +1963,7 @@ static int closeUnixFile(sqlite3_file *id){
static int unixClose(sqlite3_file *id){
int rc = SQLITE_OK;
unixFile *pFile = (unixFile *)id;
+ verifyDbFile(pFile);
unixUnlock(id, NO_LOCK);
unixEnterMutex();
@@ -1834,9 +2032,9 @@ static int nolockClose(sqlite3_file *id) {
/******************************************************************************
************************* Begin dot-file Locking ******************************
**
-** The dotfile locking implementation uses the existance of separate lock
-** files in order to control access to the database. This works on just
-** about every filesystem imaginable. But there are serious downsides:
+** The dotfile locking implementation uses the existence of separate lock
+** files (really a directory) to control access to the database. This works
+** on just about every filesystem imaginable. But there are serious downsides:
**
** (1) There is zero concurrency. A single reader blocks all other
** connections from reading or writing the database.
@@ -1847,15 +2045,15 @@ static int nolockClose(sqlite3_file *id) {
** Nevertheless, a dotlock is an appropriate locking mode for use if no
** other locking strategy is available.
**
-** Dotfile locking works by creating a file in the same directory as the
-** database and with the same name but with a ".lock" extension added.
-** The existance of a lock file implies an EXCLUSIVE lock. All other lock
-** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
+** Dotfile locking works by creating a subdirectory in the same directory as
+** the database and with the same name but with a ".lock" extension added.
+** The existence of a lock directory implies an EXCLUSIVE lock. All other
+** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
*/
/*
** The file suffix added to the data base filename in order to create the
-** lock file.
+** lock directory.
*/
#define DOTLOCK_SUFFIX ".lock"
@@ -1922,7 +2120,6 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
*/
static int dotlockLock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
- int fd;
char *zLockFile = (char *)pFile->lockingContext;
int rc = SQLITE_OK;
@@ -1932,17 +2129,19 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) {
*/
if( pFile->eFileLock > NO_LOCK ){
pFile->eFileLock = eFileLock;
-#if !OS_VXWORKS
/* Always update the timestamp on the old file */
+#ifdef HAVE_UTIME
+ utime(zLockFile, NULL);
+#else
utimes(zLockFile, NULL);
#endif
return SQLITE_OK;
}
/* grab an exclusive lock */
- fd = robust_open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
- if( fd<0 ){
- /* failed to open/create the file, someone else may have stolen the lock */
+ rc = osMkdir(zLockFile, 0777);
+ if( rc<0 ){
+ /* failed to open/create the lock directory */
int tErrno = errno;
if( EEXIST == tErrno ){
rc = SQLITE_BUSY;
@@ -1954,7 +2153,6 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) {
}
return rc;
}
- robust_close(pFile, fd, __LINE__);
/* got it, set the type and return ok */
pFile->eFileLock = eFileLock;
@@ -1973,10 +2171,11 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) {
static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
char *zLockFile = (char *)pFile->lockingContext;
+ int rc;
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,
- pFile->eFileLock, getpid()));
+ pFile->eFileLock, getpid()));
assert( eFileLock<=SHARED_LOCK );
/* no-op if possible */
@@ -1994,9 +2193,11 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
/* To fully unlock the database, delete the lock file */
assert( eFileLock==NO_LOCK );
- if( osUnlink(zLockFile) ){
- int rc = 0;
+ rc = osRmdir(zLockFile);
+ if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile);
+ if( rc<0 ){
int tErrno = errno;
+ rc = 0;
if( ENOENT != tErrno ){
rc = SQLITE_IOERR_UNLOCK;
}
@@ -2013,13 +2214,13 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
** Close a file. Make sure the lock has been released before closing.
*/
static int dotlockClose(sqlite3_file *id) {
- int rc;
+ int rc = SQLITE_OK;
if( id ){
unixFile *pFile = (unixFile*)id;
dotlockUnlock(id, NO_LOCK);
sqlite3_free(pFile->lockingContext);
+ rc = closeUnixFile(id);
}
- rc = closeUnixFile(id);
return rc;
}
/****************** End of the dot-file lock implementation *******************
@@ -2223,10 +2424,12 @@ static int flockUnlock(sqlite3_file *id, int eFileLock) {
** Close a file.
*/
static int flockClose(sqlite3_file *id) {
+ int rc = SQLITE_OK;
if( id ){
flockUnlock(id, NO_LOCK);
+ rc = closeUnixFile(id);
}
- return closeUnixFile(id);
+ return rc;
}
#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
@@ -2269,7 +2472,6 @@ static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
/* Otherwise see if some other process holds it. */
if( !reserved ){
sem_t *pSem = pFile->pInode->pSem;
- struct stat statBuf;
if( sem_trywait(pSem)==-1 ){
int tErrno = errno;
@@ -2322,7 +2524,6 @@ static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
*/
static int semLock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
- int fd;
sem_t *pSem = pFile->pInode->pSem;
int rc = SQLITE_OK;
@@ -2361,7 +2562,7 @@ static int semUnlock(sqlite3_file *id, int eFileLock) {
assert( pFile );
assert( pSem );
OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock,
- pFile->eFileLock, getpid()));
+ pFile->eFileLock, getpid()));
assert( eFileLock<=SHARED_LOCK );
/* no-op if possible */
@@ -2500,11 +2701,12 @@ static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
+ afpLockingContext *context;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
- afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+ context = (afpLockingContext *) pFile->lockingContext;
if( context->reserved ){
*pResOut = 1;
return SQLITE_OK;
@@ -2644,7 +2846,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){
** operating system calls for the specified lock.
*/
if( eFileLock==SHARED_LOCK ){
- int lrc1, lrc2, lrc1Errno;
+ int lrc1, lrc2, lrc1Errno = 0;
long lk, mask;
assert( pInode->nShared==0 );
@@ -2775,7 +2977,7 @@ static int afpUnlock(sqlite3_file *id, int eFileLock) {
SimulateIOError( h=(-1) )
SimulateIOErrorBenign(0);
-#ifndef NDEBUG
+#ifdef SQLITE_DEBUG
/* When reducing a lock such that other processes can start
** reading the database file again, make sure that the
** transaction counter was updated if any part of the database
@@ -2923,7 +3125,7 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){
** NB: If you define USE_PREAD or USE_PREAD64, then it might also
** be necessary to define _XOPEN_SOURCE to be 500. This varies from
** one system to another. Since SQLite does not define USE_PREAD
-** any any form by default, we will not attempt to define _XOPEN_SOURCE.
+** in any form by default, we will not attempt to define _XOPEN_SOURCE.
** See tickets #2741 and #2681.
**
** To avoid stomping the errno value on a failed read the lastErrno value
@@ -2931,35 +3133,51 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){
*/
static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
int got;
+ int prior = 0;
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
i64 newOffset;
#endif
TIMER_START;
+ assert( cnt==(cnt&0x1ffff) );
+ assert( id->h>2 );
+ cnt &= 0x1ffff;
+ do{
#if defined(USE_PREAD)
- do{ got = osPread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
- SimulateIOError( got = -1 );
+ got = osPread(id->h, pBuf, cnt, offset);
+ SimulateIOError( got = -1 );
#elif defined(USE_PREAD64)
- do{ got = osPread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR);
- SimulateIOError( got = -1 );
+ got = osPread64(id->h, pBuf, cnt, offset);
+ SimulateIOError( got = -1 );
#else
- newOffset = lseek(id->h, offset, SEEK_SET);
- SimulateIOError( newOffset-- );
- if( newOffset!=offset ){
- if( newOffset == -1 ){
- ((unixFile*)id)->lastErrno = errno;
- }else{
- ((unixFile*)id)->lastErrno = 0;
+ newOffset = lseek(id->h, offset, SEEK_SET);
+ SimulateIOError( newOffset-- );
+ if( newOffset!=offset ){
+ if( newOffset == -1 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }else{
+ ((unixFile*)id)->lastErrno = 0;
+ }
+ return -1;
}
- return -1;
- }
- do{ got = osRead(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
+ got = osRead(id->h, pBuf, cnt);
#endif
+ if( got==cnt ) break;
+ if( got<0 ){
+ if( errno==EINTR ){ got = 1; continue; }
+ prior = 0;
+ ((unixFile*)id)->lastErrno = errno;
+ break;
+ }else if( got>0 ){
+ cnt -= got;
+ offset += got;
+ prior += got;
+ pBuf = (void*)(got + (char*)pBuf);
+ }
+ }while( got>0 );
TIMER_END;
- if( got<0 ){
- ((unixFile*)id)->lastErrno = errno;
- }
- OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
- return got;
+ OSTRACE(("READ %-3d %5d %7lld %llu\n",
+ id->h, got+prior, offset-prior, TIMER_ELAPSED));
+ return got+prior;
}
/*
@@ -2976,6 +3194,8 @@ static int unixRead(
unixFile *pFile = (unixFile *)id;
int got;
assert( id );
+ assert( offset>=0 );
+ assert( amt>0 );
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
@@ -2986,6 +3206,23 @@ static int unixRead(
);
#endif
+#if SQLITE_MAX_MMAP_SIZE>0
+ /* Deal with as much of this read request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+#endif
+
got = seekAndRead(pFile, offset, pBuf, amt);
if( got==amt ){
return SQLITE_OK;
@@ -3001,42 +3238,60 @@ static int unixRead(
}
/*
-** Seek to the offset in id->offset then read cnt bytes into pBuf.
-** Return the number of bytes actually read. Update the offset.
-**
-** To avoid stomping the errno value on a failed write the lastErrno value
-** is set before returning.
+** Attempt to seek the file-descriptor passed as the first argument to
+** absolute offset iOff, then attempt to write nBuf bytes of data from
+** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise,
+** return the actual number of bytes written (which may be less than
+** nBuf).
*/
-static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
- int got;
-#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
- i64 newOffset;
-#endif
+static int seekAndWriteFd(
+ int fd, /* File descriptor to write to */
+ i64 iOff, /* File offset to begin writing at */
+ const void *pBuf, /* Copy data from this buffer to the file */
+ int nBuf, /* Size of buffer pBuf in bytes */
+ int *piErrno /* OUT: Error number if error occurs */
+){
+ int rc = 0; /* Value returned by system call */
+
+ assert( nBuf==(nBuf&0x1ffff) );
+ assert( fd>2 );
+ nBuf &= 0x1ffff;
TIMER_START;
+
#if defined(USE_PREAD)
- do{ got = osPwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
+ do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
#elif defined(USE_PREAD64)
- do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR);
+ do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
#else
- newOffset = lseek(id->h, offset, SEEK_SET);
- SimulateIOError( newOffset-- );
- if( newOffset!=offset ){
- if( newOffset == -1 ){
- ((unixFile*)id)->lastErrno = errno;
- }else{
- ((unixFile*)id)->lastErrno = 0;
+ do{
+ i64 iSeek = lseek(fd, iOff, SEEK_SET);
+ SimulateIOError( iSeek-- );
+
+ if( iSeek!=iOff ){
+ if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);
+ return -1;
}
- return -1;
- }
- do{ got = osWrite(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
+ rc = osWrite(fd, pBuf, nBuf);
+ }while( rc<0 && errno==EINTR );
#endif
+
TIMER_END;
- if( got<0 ){
- ((unixFile*)id)->lastErrno = errno;
- }
+ OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
- OSTRACE(("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
- return got;
+ if( rc<0 && piErrno ) *piErrno = errno;
+ return rc;
+}
+
+
+/*
+** Seek to the offset in id->offset then read cnt bytes into pBuf.
+** Return the number of bytes actually read. Update the offset.
+**
+** To avoid stomping the errno value on a failed write the lastErrno value
+** is set before returning.
+*/
+static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
+ return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
}
@@ -3064,7 +3319,7 @@ static int unixWrite(
);
#endif
-#ifndef NDEBUG
+#ifdef SQLITE_DEBUG
/* If we are doing a normal write to a database file (as opposed to
** doing a hot-journal rollback or a write to some file other than a
** normal database file) then record the fact that the database
@@ -3086,6 +3341,23 @@ static int unixWrite(
}
#endif
+#if SQLITE_MAX_MMAP_SIZE>0
+ /* Deal with as much of this write request as possible by transfering
+ ** data from the memory mapping using memcpy(). */
+ if( offset<pFile->mmapSize ){
+ if( offset+amt <= pFile->mmapSize ){
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
+ return SQLITE_OK;
+ }else{
+ int nCopy = pFile->mmapSize - offset;
+ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
+ pBuf = &((u8 *)pBuf)[nCopy];
+ amt -= nCopy;
+ offset += nCopy;
+ }
+ }
+#endif
+
while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
amt -= wrote;
offset += wrote;
@@ -3095,7 +3367,7 @@ static int unixWrite(
SimulateDiskfullError(( wrote=0, amt=1 ));
if( amt>0 ){
- if( wrote<0 ){
+ if( wrote<0 && pFile->lastErrno!=ENOSPC ){
/* lastErrno set by seekAndWrite */
return SQLITE_IOERR_WRITE;
}else{
@@ -3118,11 +3390,11 @@ int sqlite3_fullsync_count = 0;
/*
** We do not trust systems to provide a working fdatasync(). Some do.
-** Others do no. To be safe, we will stick with the (slower) fsync().
-** If you know that your system does support fdatasync() correctly,
+** Others do no. To be safe, we will stick with the (slightly slower)
+** fsync(). If you know that your system does support fdatasync() correctly,
** then simply compile with -Dfdatasync=fdatasync
*/
-#if !defined(fdatasync) && !defined(__linux__)
+#if !defined(fdatasync)
# define fdatasync fsync
#endif
@@ -3264,9 +3536,6 @@ static int openDirectory(const char *zFilename, int *pFd){
zDirname[ii] = '\0';
fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
if( fd>=0 ){
-#ifdef FD_CLOEXEC
- osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
-#endif
OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
}
}
@@ -3316,7 +3585,7 @@ static int unixSync(sqlite3_file *id, int flags){
}
/* Also fsync the directory containing the file if the DIRSYNC flag
- ** is set. This is a one-time occurrance. Many systems (examples: AIX)
+ ** is set. This is a one-time occurrence. Many systems (examples: AIX)
** are unable to fsync a directory, so ignore errors on the fsync.
*/
if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
@@ -3349,16 +3618,16 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
** actual file size after the operation may be larger than the requested
** size).
*/
- if( pFile->szChunk ){
+ if( pFile->szChunk>0 ){
nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
}
- rc = robust_ftruncate(pFile->h, (off_t)nByte);
+ rc = robust_ftruncate(pFile->h, nByte);
if( rc ){
pFile->lastErrno = errno;
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
}else{
-#ifndef NDEBUG
+#ifdef SQLITE_DEBUG
/* If we are doing a normal write to a database file (as opposed to
** doing a hot-journal rollback or a write to some file other than a
** normal database file) and we truncate the file to zero length,
@@ -3371,6 +3640,16 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){
}
#endif
+#if SQLITE_MAX_MMAP_SIZE>0
+ /* If the file was just truncated to a size smaller than the currently
+ ** mapped region, reduce the effective mapping size as well. SQLite will
+ ** use read() and write() to access data beyond this point from now on.
+ */
+ if( nByte<pFile->mmapSize ){
+ pFile->mmapSize = nByte;
+ }
+#endif
+
return SQLITE_OK;
}
}
@@ -3412,14 +3691,12 @@ static int proxyFileControl(sqlite3_file*,int,void*);
/*
** This function is called to handle the SQLITE_FCNTL_SIZE_HINT
-** file-control operation.
-**
-** If the user has configured a chunk-size for this file, it could be
-** that the file needs to be extended at this point. Otherwise, the
-** SQLITE_FCNTL_SIZE_HINT operation is a no-op for Unix.
+** file-control operation. Enlarge the database to nBytes in size
+** (rounded up to the next chunk-size). If the database is already
+** nBytes or larger, this routine is a no-op.
*/
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
- if( pFile->szChunk ){
+ if( pFile->szChunk>0 ){
i64 nSize; /* Required file size */
struct stat buf; /* Used to hold return values of fstat() */
@@ -3461,30 +3738,111 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
}
}
+#if SQLITE_MAX_MMAP_SIZE>0
+ if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
+ int rc;
+ if( pFile->szChunk<=0 ){
+ if( robust_ftruncate(pFile->h, nByte) ){
+ pFile->lastErrno = errno;
+ return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
+ }
+ }
+
+ rc = unixMapfile(pFile, nByte);
+ return rc;
+ }
+#endif
+
return SQLITE_OK;
}
/*
+** If *pArg is initially negative then this is a query. Set *pArg to
+** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
+**
+** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
+*/
+static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){
+ if( *pArg<0 ){
+ *pArg = (pFile->ctrlFlags & mask)!=0;
+ }else if( (*pArg)==0 ){
+ pFile->ctrlFlags &= ~mask;
+ }else{
+ pFile->ctrlFlags |= mask;
+ }
+}
+
+/* Forward declaration */
+static int unixGetTempname(int nBuf, char *zBuf);
+
+/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
+ unixFile *pFile = (unixFile*)id;
switch( op ){
case SQLITE_FCNTL_LOCKSTATE: {
- *(int*)pArg = ((unixFile*)id)->eFileLock;
+ *(int*)pArg = pFile->eFileLock;
return SQLITE_OK;
}
case SQLITE_LAST_ERRNO: {
- *(int*)pArg = ((unixFile*)id)->lastErrno;
+ *(int*)pArg = pFile->lastErrno;
return SQLITE_OK;
}
case SQLITE_FCNTL_CHUNK_SIZE: {
- ((unixFile*)id)->szChunk = *(int *)pArg;
+ pFile->szChunk = *(int *)pArg;
return SQLITE_OK;
}
case SQLITE_FCNTL_SIZE_HINT: {
- return fcntlSizeHint((unixFile *)id, *(i64 *)pArg);
+ int rc;
+ SimulateIOErrorBenign(1);
+ rc = fcntlSizeHint(pFile, *(i64 *)pArg);
+ SimulateIOErrorBenign(0);
+ return rc;
+ }
+ case SQLITE_FCNTL_PERSIST_WAL: {
+ unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);
+ return SQLITE_OK;
+ }
+ case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
+ unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);
+ return SQLITE_OK;
+ }
+ case SQLITE_FCNTL_VFSNAME: {
+ *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
+ return SQLITE_OK;
}
-#ifndef NDEBUG
+ case SQLITE_FCNTL_TEMPFILENAME: {
+ char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname );
+ if( zTFile ){
+ unixGetTempname(pFile->pVfs->mxPathname, zTFile);
+ *(char**)pArg = zTFile;
+ }
+ return SQLITE_OK;
+ }
+ case SQLITE_FCNTL_HAS_MOVED: {
+ *(int*)pArg = fileHasMoved(pFile);
+ return SQLITE_OK;
+ }
+#if SQLITE_MAX_MMAP_SIZE>0
+ case SQLITE_FCNTL_MMAP_SIZE: {
+ i64 newLimit = *(i64*)pArg;
+ int rc = SQLITE_OK;
+ if( newLimit>sqlite3GlobalConfig.mxMmap ){
+ newLimit = sqlite3GlobalConfig.mxMmap;
+ }
+ *(i64*)pArg = pFile->mmapSizeMax;
+ if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){
+ pFile->mmapSizeMax = newLimit;
+ if( pFile->mmapSize>0 ){
+ unixUnmapfile(pFile);
+ rc = unixMapfile(pFile, -1);
+ }
+ }
+ return rc;
+ }
+#endif
+#ifdef SQLITE_DEBUG
/* The pager calls this method to signal that it has done
** a rollback and that the database is therefore unchanged and
** it hence it is OK for the transaction change counter to be
@@ -3501,9 +3859,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
return proxyFileControl(id,op,pArg);
}
#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
- case SQLITE_FCNTL_SYNC_OMITTED: {
- return SQLITE_OK; /* A no-op */
- }
}
return SQLITE_NOTFOUND;
}
@@ -3518,21 +3873,138 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){
** a database and its journal file) that the sector size will be the
** same for both.
*/
+#ifndef __QNXNTO__
static int unixSectorSize(sqlite3_file *NotUsed){
UNUSED_PARAMETER(NotUsed);
return SQLITE_DEFAULT_SECTOR_SIZE;
}
+#endif
/*
-** Return the device characteristics for the file. This is always 0 for unix.
+** The following version of unixSectorSize() is optimized for QNX.
*/
-static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
- UNUSED_PARAMETER(NotUsed);
- return 0;
+#ifdef __QNXNTO__
+#include <sys/dcmd_blk.h>
+#include <sys/statvfs.h>
+static int unixSectorSize(sqlite3_file *id){
+ unixFile *pFile = (unixFile*)id;
+ if( pFile->sectorSize == 0 ){
+ struct statvfs fsInfo;
+
+ /* Set defaults for non-supported filesystems */
+ pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
+ pFile->deviceCharacteristics = 0;
+ if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
+ return pFile->sectorSize;
+ }
+
+ if( !strcmp(fsInfo.f_basetype, "tmp") ) {
+ pFile->sectorSize = fsInfo.f_bsize;
+ pFile->deviceCharacteristics =
+ SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */
+ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
+ ** the write succeeds */
+ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
+ ** so it is ordered */
+ 0;
+ }else if( strstr(fsInfo.f_basetype, "etfs") ){
+ pFile->sectorSize = fsInfo.f_bsize;
+ pFile->deviceCharacteristics =
+ /* etfs cluster size writes are atomic */
+ (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) |
+ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
+ ** the write succeeds */
+ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
+ ** so it is ordered */
+ 0;
+ }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){
+ pFile->sectorSize = fsInfo.f_bsize;
+ pFile->deviceCharacteristics =
+ SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */
+ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
+ ** the write succeeds */
+ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
+ ** so it is ordered */
+ 0;
+ }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){
+ pFile->sectorSize = fsInfo.f_bsize;
+ pFile->deviceCharacteristics =
+ /* full bitset of atomics from max sector size and smaller */
+ ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
+ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
+ ** so it is ordered */
+ 0;
+ }else if( strstr(fsInfo.f_basetype, "dos") ){
+ pFile->sectorSize = fsInfo.f_bsize;
+ pFile->deviceCharacteristics =
+ /* full bitset of atomics from max sector size and smaller */
+ ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
+ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
+ ** so it is ordered */
+ 0;
+ }else{
+ pFile->deviceCharacteristics =
+ SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */
+ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
+ ** the write succeeds */
+ 0;
+ }
+ }
+ /* Last chance verification. If the sector size isn't a multiple of 512
+ ** then it isn't valid.*/
+ if( pFile->sectorSize % 512 != 0 ){
+ pFile->deviceCharacteristics = 0;
+ pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
+ }
+ return pFile->sectorSize;
+}
+#endif /* __QNXNTO__ */
+
+/*
+** Return the device characteristics for the file.
+**
+** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
+** However, that choice is controversial since technically the underlying
+** file system does not always provide powersafe overwrites. (In other
+** words, after a power-loss event, parts of the file that were never
+** written might end up being altered.) However, non-PSOW behavior is very,
+** very rare. And asserting PSOW makes a large reduction in the amount
+** of required I/O for journaling, since a lot of padding is eliminated.
+** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
+** available to turn it off and URI query parameter available to turn it off.
+*/
+static int unixDeviceCharacteristics(sqlite3_file *id){
+ unixFile *p = (unixFile*)id;
+ int rc = 0;
+#ifdef __QNXNTO__
+ if( p->sectorSize==0 ) unixSectorSize(id);
+ rc = p->deviceCharacteristics;
+#endif
+ if( p->ctrlFlags & UNIXFILE_PSOW ){
+ rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
+ }
+ return rc;
}
-#ifndef SQLITE_OMIT_WAL
+#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
+
+/*
+** Return the system page size.
+**
+** This function should not be called directly by other code in this file.
+** Instead, it should be called via macro osGetpagesize().
+*/
+static int unixGetpagesize(void){
+#if defined(_BSD_SOURCE)
+ return getpagesize();
+#else
+ return (int)sysconf(_SC_PAGESIZE);
+#endif
+}
+
+#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */
+#ifndef SQLITE_OMIT_WAL
/*
** Object used to represent an shared memory buffer.
@@ -3569,7 +4041,8 @@ struct unixShmNode {
char *zFilename; /* Name of the mmapped file */
int h; /* Open file descriptor */
int szRegion; /* Size of shared-memory regions */
- int nRegion; /* Size of array apRegion */
+ u16 nRegion; /* Size of array apRegion */
+ u8 isReadonly; /* True if read-only */
char **apRegion; /* Array of mapped shared-memory regions */
int nRef; /* Number of unixShm objects pointing to this */
unixShm *pFirst; /* All unixShm objects pointing to this */
@@ -3597,11 +4070,9 @@ struct unixShm {
unixShmNode *pShmNode; /* The underlying unixShmNode object */
unixShm *pNext; /* Next unixShm with the same unixShmNode */
u8 hasMutex; /* True if holding the unixShmNode mutex */
+ u8 id; /* Id of this connection within its unixShmNode */
u16 sharedMask; /* Mask of shared locks held */
u16 exclMask; /* Mask of exclusive locks held */
-#ifdef SQLITE_DEBUG
- u8 id; /* Id of this connection within its unixShmNode */
-#endif
};
/*
@@ -3650,7 +4121,7 @@ static int unixShmSystemLock(
#ifdef SQLITE_DEBUG
{ u16 mask;
OSTRACE(("SHM-LOCK "));
- mask = (1<<(ofst+n)) - (1<<ofst);
+ mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst);
if( rc==SQLITE_OK ){
if( lockType==F_UNLCK ){
OSTRACE(("unlock %d ok", ofst));
@@ -3684,6 +4155,22 @@ static int unixShmSystemLock(
return rc;
}
+/*
+** Return the minimum number of 32KB shm regions that should be mapped at
+** a time, assuming that each mapping must be an integer multiple of the
+** current system page-size.
+**
+** Usually, this is 1. The exception seems to be systems that are configured
+** to use 64KB pages - in this case each mapping must cover at least two
+** shm regions.
+*/
+static int unixShmRegionPerMap(void){
+ int shmsz = 32*1024; /* SHM region size */
+ int pgsz = osGetpagesize(); /* System page size */
+ assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */
+ if( pgsz<shmsz ) return 1;
+ return pgsz/shmsz;
+}
/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
@@ -3695,12 +4182,13 @@ static void unixShmPurge(unixFile *pFd){
unixShmNode *p = pFd->pInode->pShmNode;
assert( unixMutexHeld() );
if( p && p->nRef==0 ){
+ int nShmPerMap = unixShmRegionPerMap();
int i;
assert( p->pInode==pFd->pInode );
- if( p->mutex ) sqlite3_mutex_free(p->mutex);
- for(i=0; i<p->nRegion; i++){
+ sqlite3_mutex_free(p->mutex);
+ for(i=0; i<p->nRegion; i+=nShmPerMap){
if( p->h>=0 ){
- munmap(p->apRegion[i], p->szRegion);
+ osMunmap(p->apRegion[i], p->szRegion);
}else{
sqlite3_free(p->apRegion[i]);
}
@@ -3775,8 +4263,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
/* Call fstat() to figure out the permissions on the database file. If
** a new *-shm file is created, an attempt will be made to create it
- ** with the same permissions. The actual permissions the file is created
- ** with are subject to the current umask setting.
+ ** with the same permissions.
*/
if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){
rc = SQLITE_IOERR_FSTAT;
@@ -3784,16 +4271,16 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
}
#ifdef SQLITE_SHM_DIRECTORY
- nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30;
+ nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
#else
- nShmFilename = 5 + (int)strlen(pDbFd->zPath);
+ nShmFilename = 6 + (int)strlen(pDbFd->zPath);
#endif
pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename );
if( pShmNode==0 ){
rc = SQLITE_NOMEM;
goto shm_open_err;
}
- memset(pShmNode, 0, sizeof(*pShmNode));
+ memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];
#ifdef SQLITE_SHM_DIRECTORY
sqlite3_snprintf(nShmFilename, zShmFilename,
@@ -3801,6 +4288,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
(u32)sStat.st_ino, (u32)sStat.st_dev);
#else
sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", pDbFd->zPath);
+ sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);
#endif
pShmNode->h = -1;
pDbFd->pInode->pShmNode = pShmNode;
@@ -3812,12 +4300,22 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
}
if( pInode->bProcessLock==0 ){
- pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT,
- (sStat.st_mode & 0777));
+ int openFlags = O_RDWR | O_CREAT;
+ if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
+ openFlags = O_RDONLY;
+ pShmNode->isReadonly = 1;
+ }
+ pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
if( pShmNode->h<0 ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
goto shm_open_err;
}
+
+ /* If this process is running as root, make sure that the SHM file
+ ** is owned by the same user that owns the original database. Otherwise,
+ ** the original owner will not be able to connect.
+ */
+ osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
/* Check to see if another process is holding the dead-man switch.
** If not, truncate the file to zero length.
@@ -3895,6 +4393,8 @@ static int unixShmMap(
unixShm *p;
unixShmNode *pShmNode;
int rc = SQLITE_OK;
+ int nShmPerMap = unixShmRegionPerMap();
+ int nReqRegion;
/* If the shared-memory file has not yet been opened, open it now. */
if( pDbFd->pShm==0 ){
@@ -3910,9 +4410,12 @@ static int unixShmMap(
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
- if( pShmNode->nRegion<=iRegion ){
+ /* Minimum number of regions required to be mapped. */
+ nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
+
+ if( pShmNode->nRegion<nReqRegion ){
char **apNew; /* New apRegion[] array */
- int nByte = (iRegion+1)*szRegion; /* Minimum required file size */
+ int nByte = nReqRegion*szRegion; /* Minimum required file size */
struct stat sStat; /* Used by fstat() */
pShmNode->szRegion = szRegion;
@@ -3930,36 +4433,55 @@ static int unixShmMap(
if( sStat.st_size<nByte ){
/* The requested memory region does not exist. If bExtend is set to
** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
- **
- ** Alternatively, if bExtend is true, use ftruncate() to allocate
- ** the requested memory region.
*/
- if( !bExtend ) goto shmpage_out;
- if( robust_ftruncate(pShmNode->h, nByte) ){
- rc = unixLogError(SQLITE_IOERR_SHMSIZE, "ftruncate",
- pShmNode->zFilename);
+ if( !bExtend ){
goto shmpage_out;
}
+
+ /* Alternatively, if bExtend is true, extend the file. Do this by
+ ** writing a single byte to the end of each (OS) page being
+ ** allocated or extended. Technically, we need only write to the
+ ** last page in order to extend the file. But writing to all new
+ ** pages forces the OS to allocate them immediately, which reduces
+ ** the chances of SIGBUS while accessing the mapped region later on.
+ */
+ else{
+ static const int pgsz = 4096;
+ int iPg;
+
+ /* Write to the last byte of each newly allocated or extended page */
+ assert( (nByte % pgsz)==0 );
+ for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
+ if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){
+ const char *zFile = pShmNode->zFilename;
+ rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
+ goto shmpage_out;
+ }
+ }
+ }
}
}
/* Map the requested memory region into this processes address space. */
apNew = (char **)sqlite3_realloc(
- pShmNode->apRegion, (iRegion+1)*sizeof(char *)
+ pShmNode->apRegion, nReqRegion*sizeof(char *)
);
if( !apNew ){
rc = SQLITE_IOERR_NOMEM;
goto shmpage_out;
}
pShmNode->apRegion = apNew;
- while(pShmNode->nRegion<=iRegion){
+ while( pShmNode->nRegion<nReqRegion ){
+ int nMap = szRegion*nShmPerMap;
+ int i;
void *pMem;
if( pShmNode->h>=0 ){
- pMem = mmap(0, szRegion, PROT_READ|PROT_WRITE,
- MAP_SHARED, pShmNode->h, pShmNode->nRegion*szRegion
+ pMem = osMmap(0, nMap,
+ pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
+ MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
);
if( pMem==MAP_FAILED ){
- rc = SQLITE_IOERR;
+ rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
goto shmpage_out;
}
}else{
@@ -3970,8 +4492,11 @@ static int unixShmMap(
}
memset(pMem, 0, szRegion);
}
- pShmNode->apRegion[pShmNode->nRegion] = pMem;
- pShmNode->nRegion++;
+
+ for(i=0; i<nShmPerMap; i++){
+ pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
+ }
+ pShmNode->nRegion += nShmPerMap;
}
}
@@ -3981,6 +4506,7 @@ shmpage_out:
}else{
*pp = 0;
}
+ if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
sqlite3_mutex_leave(pShmNode->mutex);
return rc;
}
@@ -4170,6 +4696,227 @@ static int unixShmUnmap(
# define unixShmUnmap 0
#endif /* #ifndef SQLITE_OMIT_WAL */
+#if SQLITE_MAX_MMAP_SIZE>0
+/*
+** If it is currently memory mapped, unmap file pFd.
+*/
+static void unixUnmapfile(unixFile *pFd){
+ assert( pFd->nFetchOut==0 );
+ if( pFd->pMapRegion ){
+ osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
+ pFd->pMapRegion = 0;
+ pFd->mmapSize = 0;
+ pFd->mmapSizeActual = 0;
+ }
+}
+
+/*
+** Attempt to set the size of the memory mapping maintained by file
+** descriptor pFd to nNew bytes. Any existing mapping is discarded.
+**
+** If successful, this function sets the following variables:
+**
+** unixFile.pMapRegion
+** unixFile.mmapSize
+** unixFile.mmapSizeActual
+**
+** If unsuccessful, an error message is logged via sqlite3_log() and
+** the three variables above are zeroed. In this case SQLite should
+** continue accessing the database using the xRead() and xWrite()
+** methods.
+*/
+static void unixRemapfile(
+ unixFile *pFd, /* File descriptor object */
+ i64 nNew /* Required mapping size */
+){
+ const char *zErr = "mmap";
+ int h = pFd->h; /* File descriptor open on db file */
+ u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */
+ i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */
+ u8 *pNew = 0; /* Location of new mapping */
+ int flags = PROT_READ; /* Flags to pass to mmap() */
+
+ assert( pFd->nFetchOut==0 );
+ assert( nNew>pFd->mmapSize );
+ assert( nNew<=pFd->mmapSizeMax );
+ assert( nNew>0 );
+ assert( pFd->mmapSizeActual>=pFd->mmapSize );
+ assert( MAP_FAILED!=0 );
+
+ if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
+
+ if( pOrig ){
+#if HAVE_MREMAP
+ i64 nReuse = pFd->mmapSize;
+#else
+ const int szSyspage = osGetpagesize();
+ i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
+#endif
+ u8 *pReq = &pOrig[nReuse];
+
+ /* Unmap any pages of the existing mapping that cannot be reused. */
+ if( nReuse!=nOrig ){
+ osMunmap(pReq, nOrig-nReuse);
+ }
+
+#if HAVE_MREMAP
+ pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
+ zErr = "mremap";
+#else
+ pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
+ if( pNew!=MAP_FAILED ){
+ if( pNew!=pReq ){
+ osMunmap(pNew, nNew - nReuse);
+ pNew = 0;
+ }else{
+ pNew = pOrig;
+ }
+ }
+#endif
+
+ /* The attempt to extend the existing mapping failed. Free it. */
+ if( pNew==MAP_FAILED || pNew==0 ){
+ osMunmap(pOrig, nReuse);
+ }
+ }
+
+ /* If pNew is still NULL, try to create an entirely new mapping. */
+ if( pNew==0 ){
+ pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
+ }
+
+ if( pNew==MAP_FAILED ){
+ pNew = 0;
+ nNew = 0;
+ unixLogError(SQLITE_OK, zErr, pFd->zPath);
+
+ /* If the mmap() above failed, assume that all subsequent mmap() calls
+ ** will probably fail too. Fall back to using xRead/xWrite exclusively
+ ** in this case. */
+ pFd->mmapSizeMax = 0;
+ }
+ pFd->pMapRegion = (void *)pNew;
+ pFd->mmapSize = pFd->mmapSizeActual = nNew;
+}
+
+/*
+** Memory map or remap the file opened by file-descriptor pFd (if the file
+** is already mapped, the existing mapping is replaced by the new). Or, if
+** there already exists a mapping for this file, and there are still
+** outstanding xFetch() references to it, this function is a no-op.
+**
+** If parameter nByte is non-negative, then it is the requested size of
+** the mapping to create. Otherwise, if nByte is less than zero, then the
+** requested size is the size of the file on disk. The actual size of the
+** created mapping is either the requested size or the value configured
+** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
+**
+** SQLITE_OK is returned if no error occurs (even if the mapping is not
+** recreated as a result of outstanding references) or an SQLite error
+** code otherwise.
+*/
+static int unixMapfile(unixFile *pFd, i64 nByte){
+ i64 nMap = nByte;
+ int rc;
+
+ assert( nMap>=0 || pFd->nFetchOut==0 );
+ if( pFd->nFetchOut>0 ) return SQLITE_OK;
+
+ if( nMap<0 ){
+ struct stat statbuf; /* Low-level file information */
+ rc = osFstat(pFd->h, &statbuf);
+ if( rc!=SQLITE_OK ){
+ return SQLITE_IOERR_FSTAT;
+ }
+ nMap = statbuf.st_size;
+ }
+ if( nMap>pFd->mmapSizeMax ){
+ nMap = pFd->mmapSizeMax;
+ }
+
+ if( nMap!=pFd->mmapSize ){
+ if( nMap>0 ){
+ unixRemapfile(pFd, nMap);
+ }else{
+ unixUnmapfile(pFd);
+ }
+ }
+
+ return SQLITE_OK;
+}
+#endif /* SQLITE_MAX_MMAP_SIZE>0 */
+
+/*
+** If possible, return a pointer to a mapping of file fd starting at offset
+** iOff. The mapping must be valid for at least nAmt bytes.
+**
+** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
+** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
+** Finally, if an error does occur, return an SQLite error code. The final
+** value of *pp is undefined in this case.
+**
+** If this function does return a pointer, the caller must eventually
+** release the reference by calling unixUnfetch().
+*/
+static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
+#if SQLITE_MAX_MMAP_SIZE>0
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+#endif
+ *pp = 0;
+
+#if SQLITE_MAX_MMAP_SIZE>0
+ if( pFd->mmapSizeMax>0 ){
+ if( pFd->pMapRegion==0 ){
+ int rc = unixMapfile(pFd, -1);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ if( pFd->mmapSize >= iOff+nAmt ){
+ *pp = &((u8 *)pFd->pMapRegion)[iOff];
+ pFd->nFetchOut++;
+ }
+ }
+#endif
+ return SQLITE_OK;
+}
+
+/*
+** If the third argument is non-NULL, then this function releases a
+** reference obtained by an earlier call to unixFetch(). The second
+** argument passed to this function must be the same as the corresponding
+** argument that was passed to the unixFetch() invocation.
+**
+** Or, if the third argument is NULL, then this function is being called
+** to inform the VFS layer that, according to POSIX, any existing mapping
+** may now be invalid and should be unmapped.
+*/
+static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
+#if SQLITE_MAX_MMAP_SIZE>0
+ unixFile *pFd = (unixFile *)fd; /* The underlying database file */
+ UNUSED_PARAMETER(iOff);
+
+ /* If p==0 (unmap the entire file) then there must be no outstanding
+ ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
+ ** then there must be at least one outstanding. */
+ assert( (p==0)==(pFd->nFetchOut==0) );
+
+ /* If p!=0, it must match the iOff value. */
+ assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
+
+ if( p ){
+ pFd->nFetchOut--;
+ }else{
+ unixUnmapfile(pFd);
+ }
+
+ assert( pFd->nFetchOut>=0 );
+#else
+ UNUSED_PARAMETER(fd);
+ UNUSED_PARAMETER(p);
+ UNUSED_PARAMETER(iOff);
+#endif
+ return SQLITE_OK;
+}
+
/*
** Here ends the implementation of all sqlite3_file methods.
**
@@ -4189,7 +4936,7 @@ static int unixShmUnmap(
** looks at the filesystem type and tries to guess the best locking
** strategy from that.
**
-** For finder-funtion F, two objects are created:
+** For finder-function F, two objects are created:
**
** (1) The real finder-function named "FImpt()".
**
@@ -4210,7 +4957,7 @@ static int unixShmUnmap(
** * An I/O method finder function called FINDER that returns a pointer
** to the METHOD object in the previous bullet.
*/
-#define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK) \
+#define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK, SHMMAP) \
static const sqlite3_io_methods METHOD = { \
VERSION, /* iVersion */ \
CLOSE, /* xClose */ \
@@ -4225,10 +4972,12 @@ static const sqlite3_io_methods METHOD = { \
unixFileControl, /* xFileControl */ \
unixSectorSize, /* xSectorSize */ \
unixDeviceCharacteristics, /* xDeviceCapabilities */ \
- unixShmMap, /* xShmMap */ \
+ SHMMAP, /* xShmMap */ \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
- unixShmUnmap /* xShmUnmap */ \
+ unixShmUnmap, /* xShmUnmap */ \
+ unixFetch, /* xFetch */ \
+ unixUnfetch, /* xUnfetch */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
@@ -4245,20 +4994,22 @@ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
IOMETHODS(
posixIoFinder, /* Finder function name */
posixIoMethods, /* sqlite3_io_methods object name */
- 2, /* shared memory is enabled */
+ 3, /* shared memory and mmap are enabled */
unixClose, /* xClose method */
unixLock, /* xLock method */
unixUnlock, /* xUnlock method */
- unixCheckReservedLock /* xCheckReservedLock method */
+ unixCheckReservedLock, /* xCheckReservedLock method */
+ unixShmMap /* xShmMap method */
)
IOMETHODS(
nolockIoFinder, /* Finder function name */
nolockIoMethods, /* sqlite3_io_methods object name */
- 1, /* shared memory is disabled */
+ 3, /* shared memory is disabled */
nolockClose, /* xClose method */
nolockLock, /* xLock method */
nolockUnlock, /* xUnlock method */
- nolockCheckReservedLock /* xCheckReservedLock method */
+ nolockCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
IOMETHODS(
dotlockIoFinder, /* Finder function name */
@@ -4267,7 +5018,8 @@ IOMETHODS(
dotlockClose, /* xClose method */
dotlockLock, /* xLock method */
dotlockUnlock, /* xUnlock method */
- dotlockCheckReservedLock /* xCheckReservedLock method */
+ dotlockCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
@@ -4278,7 +5030,8 @@ IOMETHODS(
flockClose, /* xClose method */
flockLock, /* xLock method */
flockUnlock, /* xUnlock method */
- flockCheckReservedLock /* xCheckReservedLock method */
+ flockCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
#endif
@@ -4290,7 +5043,8 @@ IOMETHODS(
semClose, /* xClose method */
semLock, /* xLock method */
semUnlock, /* xUnlock method */
- semCheckReservedLock /* xCheckReservedLock method */
+ semCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
#endif
@@ -4302,7 +5056,8 @@ IOMETHODS(
afpClose, /* xClose method */
afpLock, /* xLock method */
afpUnlock, /* xUnlock method */
- afpCheckReservedLock /* xCheckReservedLock method */
+ afpCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
#endif
@@ -4327,7 +5082,8 @@ IOMETHODS(
proxyClose, /* xClose method */
proxyLock, /* xLock method */
proxyUnlock, /* xUnlock method */
- proxyCheckReservedLock /* xCheckReservedLock method */
+ proxyCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
#endif
@@ -4340,7 +5096,8 @@ IOMETHODS(
unixClose, /* xClose method */
unixLock, /* xLock method */
nfsUnlock, /* xUnlock method */
- unixCheckReservedLock /* xCheckReservedLock method */
+ unixCheckReservedLock, /* xCheckReservedLock method */
+ 0 /* xShmMap method */
)
#endif
@@ -4449,7 +5206,7 @@ static const sqlite3_io_methods
#endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */
/*
-** An abstract type for a pointer to a IO method finder function:
+** An abstract type for a pointer to an IO method finder function:
*/
typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
@@ -4462,24 +5219,14 @@ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
*/
/*
-** Initializes a unixFile structure with zeros.
-*/
-void initUnixFile(sqlite3_file* file) {
- memset(file, 0, sizeof(unixFile));
-}
-
-/*
** Initialize the contents of the unixFile structure pointed to by pId.
*/
-int fillInUnixFile(
+static int fillInUnixFile(
sqlite3_vfs *pVfs, /* Pointer to vfs object */
int h, /* Open file descriptor of file being opened */
- int syncDir, /* True to sync directory on first sync */
sqlite3_file *pId, /* Write to the unixFile structure here */
const char *zFilename, /* Name of the file being opened */
- int noLock, /* Omit locking if true */
- int isDelete, /* Delete on close if true */
- int isReadOnly /* True if the file is opened read-only */
+ int ctrlFlags /* Zero or more UNIXFILE_* values */
){
const sqlite3_io_methods *pLockingStyle;
unixFile *pNew = (unixFile *)pId;
@@ -4487,11 +5234,6 @@ int fillInUnixFile(
assert( pNew->pInode==NULL );
- /* Parameter isDelete is only used on vxworks. Express this explicitly
- ** here to prevent compiler warnings about unused parameters.
- */
- UNUSED_PARAMETER(isDelete);
-
/* Usually the path zFilename should not be a relative pathname. The
** exception is when opening the proxy "conch" file in builds that
** include the special Apple locking styles.
@@ -4503,30 +5245,34 @@ int fillInUnixFile(
assert( zFilename==0 || zFilename[0]=='/' );
#endif
+ /* No locking occurs in temporary files */
+ assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
+
OSTRACE(("OPEN %-3d %s\n", h, zFilename));
pNew->h = h;
+ pNew->pVfs = pVfs;
pNew->zPath = zFilename;
- if( strcmp(pVfs->zName,"unix-excl")==0 ){
- pNew->ctrlFlags = UNIXFILE_EXCL;
- }else{
- pNew->ctrlFlags = 0;
- }
- if( isReadOnly ){
- pNew->ctrlFlags |= UNIXFILE_RDONLY;
+ pNew->ctrlFlags = (u8)ctrlFlags;
+#if SQLITE_MAX_MMAP_SIZE>0
+ pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
+#endif
+ if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
+ "psow", SQLITE_POWERSAFE_OVERWRITE) ){
+ pNew->ctrlFlags |= UNIXFILE_PSOW;
}
- if( syncDir ){
- pNew->ctrlFlags |= UNIXFILE_DIRSYNC;
+ if( strcmp(pVfs->zName,"unix-excl")==0 ){
+ pNew->ctrlFlags |= UNIXFILE_EXCL;
}
#if OS_VXWORKS
pNew->pId = vxworksFindFileId(zFilename);
if( pNew->pId==0 ){
- noLock = 1;
+ ctrlFlags |= UNIXFILE_NOLOCK;
rc = SQLITE_NOMEM;
}
#endif
- if( noLock ){
+ if( ctrlFlags & UNIXFILE_NOLOCK ){
pLockingStyle = &nolockIoMethods;
}else{
pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
@@ -4546,7 +5292,7 @@ int fillInUnixFile(
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
if( rc!=SQLITE_OK ){
- /* If an error occured in findInodeInfo(), close the file descriptor
+ /* If an error occurred in findInodeInfo(), close the file descriptor
** immediately, before releasing the mutex. findInodeInfo() may fail
** in two scenarios:
**
@@ -4604,6 +5350,7 @@ int fillInUnixFile(
*/
char *zLockFile;
int nFilename;
+ assert( zFilename!=0 );
nFilename = (int)strlen(zFilename) + 6;
zLockFile = (char *)sqlite3_malloc(nFilename);
if( zLockFile==0 ){
@@ -4644,15 +5391,15 @@ int fillInUnixFile(
if( h>=0 ) robust_close(pNew, h, __LINE__);
h = -1;
osUnlink(zFilename);
- isDelete = 0;
+ pNew->ctrlFlags |= UNIXFILE_DELETE;
}
- pNew->isDelete = isDelete;
#endif
if( rc!=SQLITE_OK ){
if( h>=0 ) robust_close(pNew, h, __LINE__);
}else{
pNew->pMethod = pLockingStyle;
OpenCounter(+1);
+ verifyDbFile(pNew);
}
return rc;
}
@@ -4665,6 +5412,7 @@ static const char *unixTempFileDir(void){
static const char *azDirs[] = {
0,
0,
+ 0,
"/var/tmp",
"/usr/tmp",
"/tmp",
@@ -4675,7 +5423,8 @@ static const char *unixTempFileDir(void){
const char *zDir = 0;
azDirs[0] = sqlite3_temp_directory;
- if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR");
+ if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR");
+ if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR");
for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){
if( zDir==0 ) continue;
if( osStat(zDir, &buf) ) continue;
@@ -4711,18 +5460,19 @@ static int unixGetTempname(int nBuf, char *zBuf){
/* Check that the output buffer is large enough for the temporary file
** name. If it is not, return SQLITE_ERROR.
*/
- if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= (size_t)nBuf ){
+ if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){
return SQLITE_ERROR;
}
do{
- sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
+ sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
j = (int)strlen(zBuf);
sqlite3_randomness(15, &zBuf[j]);
for(i=0; i<15; i++, j++){
zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
}
zBuf[j] = 0;
+ zBuf[j+1] = 0;
}while( osAccess(zBuf,0)==0 );
return SQLITE_OK;
}
@@ -4770,7 +5520,7 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
** descriptor on the same path, fail, and return an error to SQLite.
**
** Even if a subsequent open() call does succeed, the consequences of
- ** not searching for a resusable file descriptor are not dire. */
+ ** not searching for a reusable file descriptor are not dire. */
if( 0==osStat(zPath, &sStat) ){
unixInodeInfo *pInode;
@@ -4801,23 +5551,31 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
** written to *pMode. If an IO error occurs, an SQLite error code is
** returned and the value of *pMode is not modified.
**
-** If the file being opened is a temporary file, it is always created with
-** the octal permissions 0600 (read/writable by owner only). If the file
-** is a database or master journal file, it is created with the permissions
-** mask SQLITE_DEFAULT_FILE_PERMISSIONS.
-**
-** Finally, if the file being opened is a WAL or regular journal file, then
+** In most cases, this routine sets *pMode to 0, which will become
+** an indication to robust_open() to create the file using
+** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
+** But if the file being opened is a WAL or regular journal file, then
** this function queries the file-system for the permissions on the
** corresponding database file and sets *pMode to this value. Whenever
** possible, WAL and journal files are created using the same permissions
** as the associated database file.
+**
+** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
+** original filename is unavailable. But 8_3_NAMES is only used for
+** FAT filesystems and permissions do not matter there, so just use
+** the default permissions.
*/
static int findCreateFileMode(
const char *zPath, /* Path of file (possibly) being created */
int flags, /* Flags passed as 4th argument to xOpen() */
- mode_t *pMode /* OUT: Permissions to open file with */
+ mode_t *pMode, /* OUT: Permissions to open file with */
+ uid_t *pUid, /* OUT: uid to set on the file */
+ gid_t *pGid /* OUT: gid to set on the file */
){
int rc = SQLITE_OK; /* Return Code */
+ *pMode = 0;
+ *pUid = 0;
+ *pGid = 0;
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
char zDb[MAX_PATHNAME+1]; /* Database file path */
int nDb; /* Number of valid bytes in zDb */
@@ -4829,27 +5587,35 @@ static int findCreateFileMode(
**
** "<path to db>-journal"
** "<path to db>-wal"
- ** "<path to db>-journal-NNNN"
- ** "<path to db>-wal-NNNN"
+ ** "<path to db>-journalNN"
+ ** "<path to db>-walNN"
**
- ** where NNNN is a 4 digit decimal number. The NNNN naming schemes are
+ ** where NN is a decimal number. The NN naming schemes are
** used by the test_multiplex.c module.
*/
nDb = sqlite3Strlen30(zPath) - 1;
- while( nDb>0 && zPath[nDb]!='l' ) nDb--;
- nDb -= ((flags & SQLITE_OPEN_WAL) ? 3 : 7);
+#ifdef SQLITE_ENABLE_8_3_NAMES
+ while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--;
+ if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK;
+#else
+ while( zPath[nDb]!='-' ){
+ assert( nDb>0 );
+ assert( zPath[nDb]!='\n' );
+ nDb--;
+ }
+#endif
memcpy(zDb, zPath, nDb);
zDb[nDb] = '\0';
if( 0==osStat(zDb, &sStat) ){
*pMode = sStat.st_mode & 0777;
+ *pUid = sStat.st_uid;
+ *pGid = sStat.st_gid;
}else{
rc = SQLITE_IOERR_FSTAT;
}
}else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
*pMode = 0600;
- }else{
- *pMode = SQLITE_DEFAULT_FILE_PERMISSIONS;
}
return rc;
}
@@ -4866,9 +5632,9 @@ int chromium_sqlite3_fill_in_unix_sqlite3_file(sqlite3_vfs* vfs,
int dirfd,
sqlite3_file* file,
const char* fileName,
- int noLock,
- int isDelete) {
- return fillInUnixFile(vfs, fd, dirfd, file, fileName, noLock, isDelete, 0);
+ int noLock) {
+ int ctrlFlags = (noLock ? UNIXFILE_NOLOCK : 0);
+ return fillInUnixFile(vfs, fd, file, fileName, ctrlFlags);
}
/*
@@ -4956,6 +5722,7 @@ static int unixOpen(
int eType = flags&0xFFFFFF00; /* Type of file to open */
int noLock; /* True to omit locking primitives */
int rc = SQLITE_OK; /* Function Return Code */
+ int ctrlFlags = 0; /* UNIXFILE_* flags */
int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
@@ -4965,6 +5732,9 @@ static int unixOpen(
#if SQLITE_ENABLE_LOCKING_STYLE
int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);
#endif
+#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
+ struct statfs fsInfo;
+#endif
/* If creating a master or main-file journal, this function will open
** a file-descriptor on the directory too. The first time unixSync()
@@ -4979,7 +5749,7 @@ static int unixOpen(
/* If argument zPath is a NULL pointer, this function is required to open
** a temporary file. Use this buffer to store the file name in.
*/
- char zTmpname[MAX_PATHNAME+1];
+ char zTmpname[MAX_PATHNAME+2];
const char *zName = zPath;
/* Check the following statements are true:
@@ -5008,6 +5778,16 @@ static int unixOpen(
|| eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
);
+ /* Detect a pid change and reset the PRNG. There is a race condition
+ ** here such that two or more threads all trying to open databases at
+ ** the same instant might all reset the PRNG. But multiple resets
+ ** are harmless.
+ */
+ if( randomnessPid!=getpid() ){
+ randomnessPid = getpid();
+ sqlite3_randomness(0,0);
+ }
+
chromium_sqlite3_initialize_unix_sqlite3_file(pFile);
if( eType==SQLITE_OPEN_MAIN_DB ){
@@ -5015,14 +5795,24 @@ static int unixOpen(
if( rc!=SQLITE_OK ){
return rc;
}
+
+ /* Database filenames are double-zero terminated if they are not
+ ** URIs with parameters. Hence, they can always be passed into
+ ** sqlite3_uri_parameter(). */
+ assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 );
+
}else if( !zName ){
/* If zName is NULL, the upper layer is requesting a temp file. */
assert(isDelete && !syncDir);
- rc = unixGetTempname(MAX_PATHNAME+1, zTmpname);
+ rc = unixGetTempname(MAX_PATHNAME+2, zTmpname);
if( rc!=SQLITE_OK ){
return rc;
}
zName = zTmpname;
+
+ /* Generated temporary filenames are always double-zero terminated
+ ** for use by sqlite3_uri_parameter(). */
+ assert( zName[strlen(zName)+1]==0 );
}
/* Determine the value of the flags parameter passed to POSIX function
@@ -5037,7 +5827,9 @@ static int unixOpen(
if( fd<0 ){
mode_t openMode; /* Permissions to create file with */
- rc = findCreateFileMode(zName, flags, &openMode);
+ uid_t uid; /* Userid for the file */
+ gid_t gid; /* Groupid for the file */
+ rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
if( rc!=SQLITE_OK ){
assert( !p->pUnused );
assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
@@ -5058,6 +5850,14 @@ static int unixOpen(
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);
goto open_finished;
}
+
+ /* If this process is running as root and if creating a new rollback
+ ** journal or WAL file, set the ownership of the journal or WAL to be
+ ** the same as the original database.
+ */
+ if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
+ osFchown(fd, uid, gid);
+ }
}
assert( fd>=0 );
if( pOutFlags ){
@@ -5069,6 +5869,12 @@ static int unixOpen(
if( isDelete ){
#if OS_VXWORKS
zPath = zName;
+#elif defined(SQLITE_UNLINK_AFTER_CLOSE)
+ zPath = sqlite3_mprintf("%s", zName);
+ if( zPath==0 ){
+ robust_close(p, fd, __LINE__);
+ return SQLITE_NOMEM;
+ }
#else
osUnlink(zName);
#endif
@@ -5079,15 +5885,10 @@ static int unixOpen(
}
#endif
-#ifdef FD_CLOEXEC
- osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
-#endif
-
noLock = eType!=SQLITE_OPEN_MAIN_DB;
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
- struct statfs fsInfo;
if( fstatfs(fd, &fsInfo) == -1 ){
((unixFile*)pFile)->lastErrno = errno;
robust_close(p, fd, __LINE__);
@@ -5097,7 +5898,14 @@ static int unixOpen(
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
}
#endif
-
+
+ /* Set up appropriate ctrlFlags */
+ if( isDelete ) ctrlFlags |= UNIXFILE_DELETE;
+ if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY;
+ if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK;
+ if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC;
+ if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI;
+
#if SQLITE_ENABLE_LOCKING_STYLE
#if SQLITE_PREFER_PROXY_LOCKING
isAutoProxy = 1;
@@ -5111,7 +5919,6 @@ static int unixOpen(
if( envforce!=NULL ){
useProxy = atoi(envforce)>0;
}else{
- struct statfs fsInfo;
if( statfs(zPath, &fsInfo) == -1 ){
/* In theory, the close(fd) call is sub-optimal. If the file opened
** with fd is a database file, and there are other connections open
@@ -5128,8 +5935,7 @@ static int unixOpen(
useProxy = !(fsInfo.f_flags&MNT_LOCAL);
}
if( useProxy ){
- rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
- isDelete, isReadonly);
+ rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
if( rc==SQLITE_OK ){
rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
if( rc!=SQLITE_OK ){
@@ -5146,8 +5952,8 @@ static int unixOpen(
}
#endif
- rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
- isDelete, isReadonly);
+ rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
+
open_finished:
if( rc!=SQLITE_OK ){
chromium_sqlite3_destroy_reusable_file_handle(pFile);
@@ -5168,11 +5974,20 @@ static int unixDelete(
int rc = SQLITE_OK;
UNUSED_PARAMETER(NotUsed);
SimulateIOError(return SQLITE_IOERR_DELETE);
- if( osUnlink(zPath)==(-1) && errno!=ENOENT ){
- return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
+ if( osUnlink(zPath)==(-1) ){
+ if( errno==ENOENT
+#if OS_VXWORKS
+ || osAccess(zPath,0)!=0
+#endif
+ ){
+ rc = SQLITE_IOERR_DELETE_NOENT;
+ }else{
+ rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
+ }
+ return rc;
}
#ifndef SQLITE_DISABLE_DIRSYNC
- if( dirSync ){
+ if( (dirSync & 1)!=0 ){
int fd;
rc = osOpenDirectory(zPath, &fd);
if( rc==SQLITE_OK ){
@@ -5194,7 +6009,7 @@ static int unixDelete(
}
/*
-** Test the existance of or access permissions of file zPath. The
+** Test the existence of or access permissions of file zPath. The
** test performed depends on the value of flags:
**
** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
@@ -5360,20 +6175,20 @@ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
** tests repeatable.
*/
memset(zBuf, 0, nBuf);
+ randomnessPid = getpid();
#if !defined(SQLITE_TEST)
{
- int pid, fd;
+ int fd, got;
fd = robust_open("/dev/urandom", O_RDONLY, 0);
if( fd<0 ){
time_t t;
time(&t);
memcpy(zBuf, &t, sizeof(t));
- pid = getpid();
- memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
- assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf );
- nBuf = sizeof(t) + sizeof(pid);
+ memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid));
+ assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf );
+ nBuf = sizeof(t) + sizeof(randomnessPid);
}else{
- do{ nBuf = osRead(fd, zBuf, nBuf); }while( nBuf<0 && errno==EINTR );
+ do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR );
robust_close(0, fd, __LINE__);
}
}
@@ -5427,10 +6242,12 @@ int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */
** epoch of noon in Greenwich on November 24, 4714 B.C according to the
** proleptic Gregorian calendar.
**
-** On success, return 0. Return 1 if the time and date cannot be found.
+** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date
+** cannot be found.
*/
static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
+ int rc = SQLITE_OK;
#if defined(NO_GETTOD)
time_t t;
time(&t);
@@ -5441,8 +6258,11 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
*piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;
#else
struct timeval sNow;
- gettimeofday(&sNow, 0);
- *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
+ if( gettimeofday(&sNow, 0)==0 ){
+ *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
+ }else{
+ rc = SQLITE_ERROR;
+ }
#endif
#ifdef SQLITE_TEST
@@ -5451,7 +6271,7 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
}
#endif
UNUSED_PARAMETER(NotUsed);
- return 0;
+ return rc;
}
/*
@@ -5460,11 +6280,12 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
** return 0. Return 1 if the time and date cannot be found.
*/
static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
- sqlite3_int64 i;
+ sqlite3_int64 i = 0;
+ int rc;
UNUSED_PARAMETER(NotUsed);
- unixCurrentTimeInt64(0, &i);
+ rc = unixCurrentTimeInt64(0, &i);
*prNow = i/86400000.0;
- return 0;
+ return rc;
}
/*
@@ -5510,7 +6331,7 @@ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
** address in the shared range is taken for a SHARED lock, the entire
** shared range is taken for an EXCLUSIVE lock):
**
-** PENDING_BYTE 0x40000000
+** PENDING_BYTE 0x40000000
** RESERVED_BYTE 0x40000001
** SHARED_RANGE 0x40000002 -> 0x40000200
**
@@ -5579,7 +6400,7 @@ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
** proxy path against the values stored in the conch. The conch file is
** stored in the same directory as the database file and the file name
** is patterned after the database file name as ".<databasename>-conch".
-** If the conch file does not exist, or it's contents do not match the
+** If the conch file does not exist, or its contents do not match the
** host ID and/or proxy path, then the lock is escalated to an exclusive
** lock and the conch file contents is updated with the host ID and proxy
** path and the lock is downgraded to a shared lock again. If the conch
@@ -5631,7 +6452,7 @@ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
** force proxy locking to be used for every database file opened, and 0
** will force automatic proxy locking to be disabled for all database
-** files (explicity calling the SQLITE_SET_LOCKPROXYFILE pragma or
+** files (explicitly calling the SQLITE_SET_LOCKPROXYFILE pragma or
** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
*/
@@ -5717,7 +6538,7 @@ static int proxyCreateLockPath(const char *lockPath){
if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/')
|| (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){
buf[i]='\0';
- if( mkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
+ if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
int err=errno;
if( err!=EEXIST ) {
OSTRACE(("CREATELOCKPATH FAILED creating %s, "
@@ -5771,17 +6592,17 @@ static int proxyCreateUnixFile(
}
}
if( fd<0 ){
- fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS);
+ fd = robust_open(path, openFlags, 0);
terrno = errno;
if( fd<0 && errno==ENOENT && islockfile ){
if( proxyCreateLockPath(path) == SQLITE_OK ){
- fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS);
+ fd = robust_open(path, openFlags, 0);
}
}
}
if( fd<0 ){
openFlags = O_RDONLY;
- fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS);
+ fd = robust_open(path, openFlags, 0);
terrno = errno;
}
if( fd<0 ){
@@ -5812,7 +6633,7 @@ static int proxyCreateUnixFile(
pUnused->flags = openFlags;
pNew->pUnused = pUnused;
- rc = fillInUnixFile(&dummyVfs, fd, 0, (sqlite3_file*)pNew, path, 0, 0, 0);
+ rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0);
if( rc==SQLITE_OK ){
*ppFile = pNew;
return SQLITE_OK;
@@ -5852,6 +6673,8 @@ static int proxyGetHostID(unsigned char *pHostID, int *pError){
return SQLITE_IOERR;
}
}
+#else
+ UNUSED_PARAMETER(pError);
#endif
#ifdef SQLITE_TEST
/* simulate multiple hosts by creating unique hostid file paths */
@@ -5903,8 +6726,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
goto end_breaklock;
}
/* write it out to the temporary break file */
- fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL),
- SQLITE_DEFAULT_FILE_PERMISSIONS);
+ fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0);
if( fd<0 ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);
goto end_breaklock;
@@ -5944,6 +6766,7 @@ static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
int nTries = 0;
struct timespec conchModTime;
+ memset(&conchModTime, 0, sizeof(conchModTime));
do {
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
nTries ++;
@@ -6175,12 +6998,12 @@ static int proxyTakeConch(unixFile *pFile){
end_takeconch:
OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));
if( rc==SQLITE_OK && pFile->openFlags ){
+ int fd;
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
}
pFile->h = -1;
- int fd = robust_open(pCtx->dbPath, pFile->openFlags,
- SQLITE_DEFAULT_FILE_PERMISSIONS);
+ fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);
OSTRACE(("TRANSPROXY: OPEN %d\n", fd));
if( fd>=0 ){
pFile->h = fd;
@@ -6749,7 +7572,7 @@ int sqlite3_os_init(void){
/* Double-check that the aSyscall[] array has been constructed
** correctly. See ticket [bb3a86e890c8e96ab] */
- assert( ArraySize(aSyscall)==18 );
+ assert( ArraySize(aSyscall)==25 );
/* Register all VFSes defined in the aVfs[] array */
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
« no previous file with comments | « third_party/sqlite/src/src/os_setup.h ('k') | third_party/sqlite/src/src/os_win.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698