third_party/sqlite/sqlite-src-3080704/src/os_unix.c - Issue 2363173002: [sqlite] Remove obsolete reference version 3.8.7.4.

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/src/os_unix.c

Issue 2363173002: [sqlite] Remove obsolete reference version 3.8.7.4. (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /*

2 ** 2004 May 22

3 **

4 ** The author disclaims copyright to this source code. In place of

5 ** a legal notice, here is a blessing:

6 **

7 ** May you do good and not evil.

8 ** May you find forgiveness for yourself and forgive others.

9 ** May you share freely, never taking more than you give.

10 **

11 ******************************************************************************

12 **

13 ** This file contains the VFS implementation for unix-like operating systems

14 ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.

15 **

16 ** There are actually several different VFS implementations in this file.

17 ** The differences are in the way that file locking is done. The default

18 ** implementation uses Posix Advisory Locks. Alternative implementations

19 ** use flock(), dot-files, various proprietary locking schemas, or simply

20 ** skip locking all together.

21 **

22 ** This source file is organized into divisions where the logic for various

23 ** subfunctions is contained within the appropriate division. PLEASE

24 ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed

25 ** in the correct division and should be clearly labeled.

26 **

27 ** The layout of divisions is as follows:

28 **

29 ** * General-purpose declarations and utility functions.

30 ** * Unique file ID logic used by VxWorks.

31 ** * Various locking primitive implementations (all except proxy locking):

32 ** + for Posix Advisory Locks

33 ** + for no-op locks

34 ** + for dot-file locks

35 ** + for flock() locking

36 ** + for named semaphore locks (VxWorks only)

37 ** + for AFP filesystem locks (MacOSX only)

38 ** * sqlite3_file methods not associated with locking.

39 ** * Definitions of sqlite3_io_methods objects for all locking

40 ** methods plus "finder" functions for each locking method.

41 ** * sqlite3_vfs method implementations.

42 ** * Locking primitives for the proxy uber-locking-method. (MacOSX only)

43 ** * Definitions of sqlite3_vfs objects for all locking methods

44 ** plus implementations of sqlite3_os_init() and sqlite3_os_end().

45 */

46 #include "sqliteInt.h"

47 #if SQLITE_OS_UNIX /* This file is used on unix only */

48

49 /*

50 ** There are various methods for file locking used for concurrency

51 ** control:

52 **

53 ** 1. POSIX locking (the default),

54 ** 2. No locking,

55 ** 3. Dot-file locking,

56 ** 4. flock() locking,

57 ** 5. AFP locking (OSX only),

58 ** 6. Named POSIX semaphores (VXWorks only),

59 ** 7. proxy locking. (OSX only)

60 **

61 ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE

62 ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic

63 ** selection of the appropriate locking style based on the filesystem

64 ** where the database is located.

65 */

66 #if !defined(SQLITE_ENABLE_LOCKING_STYLE)

67 # if defined(__APPLE__)

68 # define SQLITE_ENABLE_LOCKING_STYLE 1

69 # else

70 # define SQLITE_ENABLE_LOCKING_STYLE 0

71 # endif

72 #endif

73

74 /*

75 ** Define the OS_VXWORKS pre-processor macro to 1 if building on

76 ** vxworks, or 0 otherwise.

77 */

78 #ifndef OS_VXWORKS

79 # if defined(__RTP__) \|\| defined(_WRS_KERNEL)

80 # define OS_VXWORKS 1

81 # else

82 # define OS_VXWORKS 0

83 # endif

84 #endif

85

86 /*

87 ** standard include files.

88 */

89 #include <sys/types.h>

90 #include <sys/stat.h>

91 #include <fcntl.h>

92 #include <unistd.h>

93 #include <time.h>

94 #include <sys/time.h>

95 #include <errno.h>

96 #if !defined(SQLITE_OMIT_WAL) \|\| SQLITE_MAX_MMAP_SIZE>0

97 # include <sys/mman.h>

98 #endif

99

100 #if SQLITE_ENABLE_LOCKING_STYLE \|\| OS_VXWORKS

101 # include <sys/ioctl.h>

102 # if OS_VXWORKS

103 # include <semaphore.h>

104 # include <limits.h>

105 # else

106 # include <sys/file.h>

107 # include <sys/param.h>

108 # endif

109 #endif /* SQLITE_ENABLE_LOCKING_STYLE */

110

111 #if defined(__APPLE__) \|\| (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)

112 # include <sys/mount.h>

113 #endif

114

115 #ifdef HAVE_UTIME

116 # include <utime.h>

117 #endif

118

119 /*

120 ** Allowed values of unixFile.fsFlags

121 */

122 #define SQLITE_FSFLAGS_IS_MSDOS 0x1

123

124 /*

125 ** If we are to be thread-safe, include the pthreads header and define

126 ** the SQLITE_UNIX_THREADS macro.

127 */

128 #if SQLITE_THREADSAFE

129 # include <pthread.h>

130 # define SQLITE_UNIX_THREADS 1

131 #endif

132

133 /*

134 ** Default permissions when creating a new file

135 */

136 #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS

137 # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644

138 #endif

139

140 /*

141 ** Default permissions when creating auto proxy dir

142 */

143 #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS

144 # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755

145 #endif

146

147 /*

148 ** Maximum supported path-length.

149 */

150 #define MAX_PATHNAME 512

151

152 /*

153 ** Only set the lastErrno if the error code is a real error and not

154 ** a normal expected return code of SQLITE_BUSY or SQLITE_OK

155 */

156 #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))

157

158 /* Forward references */

159 typedef struct unixShm unixShm; /* Connection shared memory */

160 typedef struct unixShmNode unixShmNode; /* Shared memory instance */

161 typedef struct unixInodeInfo unixInodeInfo; /* An i-node */

162 typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */

163

164 /*

165 ** Sometimes, after a file handle is closed by SQLite, the file descriptor

166 ** cannot be closed immediately. In these cases, instances of the following

167 ** structure are used to store the file descriptor while waiting for an

168 ** opportunity to either close or reuse it.

169 */

170 struct UnixUnusedFd {

171 int fd; /* File descriptor to close */

172 int flags; /* Flags this file descriptor was opened with */

173 UnixUnusedFd pNext; / Next unused file descriptor on same file */

174 };

175

176 /*

177 ** The unixFile structure is subclass of sqlite3_file specific to the unix

178 ** VFS implementations.

179 */

180 typedef struct unixFile unixFile;

181 struct unixFile {

182 sqlite3_io_methods const pMethod; / Always the first entry */

183 sqlite3_vfs pVfs; / The VFS that created this unixFile */

184 unixInodeInfo pInode; / Info about locks on this inode */

185 int h; /* The file descriptor */

186 unsigned char eFileLock; /* The type of lock held on this fd */

187 unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */

188 int lastErrno; /* The unix errno from last I/O error */

189 void lockingContext; / Locking style specific state */

190 UnixUnusedFd pUnused; / Pre-allocated UnixUnusedFd */

191 const char zPath; / Name of the file */

192 unixShm pShm; / Shared memory segment information */

193 int szChunk; /* Configured by FCNTL_CHUNK_SIZE */

194 #if SQLITE_MAX_MMAP_SIZE>0

195 int nFetchOut; /* Number of outstanding xFetch refs */

196 sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */

197 sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */

198 sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */

199 void pMapRegion; / Memory mapped region */

200 #endif

201 #ifdef __QNXNTO__

202 int sectorSize; /* Device sector size */

203 int deviceCharacteristics; /* Precomputed device characteristics */

204 #endif

205 #if SQLITE_ENABLE_LOCKING_STYLE

206 int openFlags; /* The flags specified at open() */

207 #endif

208 #if SQLITE_ENABLE_LOCKING_STYLE \|\| defined(__APPLE__)

209 unsigned fsFlags; /* cached details from statfs() */

210 #endif

211 #if OS_VXWORKS

212 struct vxworksFileId pId; / Unique file ID */

213 #endif

214 #ifdef SQLITE_DEBUG

215 /* The next group of variables are used to track whether or not the

216 ** transaction counter in bytes 24-27 of database files are updated

217 ** whenever any part of the database changes. An assertion fault will

218 ** occur if a file is updated without also updating the transaction

219 ** counter. This test is made to avoid new problems similar to the

220 ** one described by ticket #3584.

221 */

222 unsigned char transCntrChng; /* True if the transaction counter changed */

223 unsigned char dbUpdate; /* True if any part of database file changed */

224 unsigned char inNormalWrite; /* True if in a normal write operation */

225

226 #endif

227

228 #ifdef SQLITE_TEST

229 /* In test mode, increase the size of this structure a bit so that

230 ** it is larger than the struct CrashFile defined in test6.c.

231 */

232 char aPadding[32];

233 #endif

234 };

235

236 /* This variable holds the process id (pid) from when the xRandomness()

237 ** method was called. If xOpen() is called from a different process id,

238 ** indicating that a fork() has occurred, the PRNG will be reset.

239 */

240 static int randomnessPid = 0;

241

242 /*

243 ** Allowed values for the unixFile.ctrlFlags bitmask:

244 */

245 #define UNIXFILE_EXCL 0x01 /* Connections from one process only */

246 #define UNIXFILE_RDONLY 0x02 /* Connection is read only */

247 #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */

248 #ifndef SQLITE_DISABLE_DIRSYNC

249 # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */

250 #else

251 # define UNIXFILE_DIRSYNC 0x00

252 #endif

253 #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */

254 #define UNIXFILE_DELETE 0x20 /* Delete on close */

255 #define UNIXFILE_URI 0x40 /* Filename might have query parameters */

256 #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */

257 #define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings have been issue d */

258

259 /*

260 ** Include code that is common to all os_*.c files

261 */

262 #include "os_common.h"

263

264 /*

265 ** Define various macros that are missing from some systems.

266 */

267 #ifndef O_LARGEFILE

268 # define O_LARGEFILE 0

269 #endif

270 #ifdef SQLITE_DISABLE_LFS

271 # undef O_LARGEFILE

272 # define O_LARGEFILE 0

273 #endif

274 #ifndef O_NOFOLLOW

275 # define O_NOFOLLOW 0

276 #endif

277 #ifndef O_BINARY

278 # define O_BINARY 0

279 #endif

280

281 /*

282 ** The threadid macro resolves to the thread-id or to 0. Used for

283 ** testing and debugging only.

284 */

285 #if SQLITE_THREADSAFE

286 #define threadid pthread_self()

287 #else

288 #define threadid 0

289 #endif

290

291 /*

292 ** HAVE_MREMAP defaults to true on Linux and false everywhere else.

293 */

294 #if !defined(HAVE_MREMAP)

295 # if defined(__linux__) && defined(_GNU_SOURCE)

296 # define HAVE_MREMAP 1

297 # else

298 # define HAVE_MREMAP 0

299 # endif

300 #endif

301

302 /*

303 ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()

304 ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.

305 */

306 #ifdef __ANDROID__

307 # define lseek lseek64

308 #endif

309

310 /*

311 ** Different Unix systems declare open() in different ways. Same use

312 ** open(const char,int,mode_t). Others use open(const char,int,...).

313 ** The difference is important when using a pointer to the function.

314 **

315 ** The safest way to deal with the problem is to always use this wrapper

316 ** which always has the same well-defined interface.

317 */

318 static int posixOpen(const char *zFile, int flags, int mode){

319 return open(zFile, flags, mode);

320 }

321

322 /*

323 ** On some systems, calls to fchown() will trigger a message in a security

324 ** log if they come from non-root processes. So avoid calling fchown() if

325 ** we are not running as root.

326 */

327 static int posixFchown(int fd, uid_t uid, gid_t gid){

328 #if OS_VXWORKS

329 return 0;

330 #else

331 return geteuid() ? 0 : fchown(fd,uid,gid);

332 #endif

333 }

334

335 /* Forward reference */

336 static int openDirectory(const char, int);

337 static int unixGetpagesize(void);

338

339 /*

340 ** Many system calls are accessed through pointer-to-functions so that

341 ** they may be overridden at runtime to facilitate fault injection during

342 ** testing and sandboxing. The following array holds the names and pointers

343 ** to all overrideable system calls.

344 */

345 static struct unix_syscall {

346 const char zName; / Name of the system call */

347 sqlite3_syscall_ptr pCurrent; /* Current value of the system call */

348 sqlite3_syscall_ptr pDefault; /* Default value */

349 } aSyscall[] = {

350 { "open", (sqlite3_syscall_ptr)posixOpen, 0 },

351 #define osOpen ((int()(const char,int,int))aSyscall[0].pCurrent)

352

353 { "close", (sqlite3_syscall_ptr)close, 0 },

354 #define osClose ((int(*)(int))aSyscall[1].pCurrent)

355

356 { "access", (sqlite3_syscall_ptr)access, 0 },

357 #define osAccess ((int()(const char,int))aSyscall[2].pCurrent)

358

359 { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 },

360 #define osGetcwd ((char()(char*,size_t))aSyscall[3].pCurrent)

361

362 { "stat", (sqlite3_syscall_ptr)stat, 0 },

363 #define osStat ((int()(const char,struct stat*))aSyscall[4].pCurrent)

364

365 /*

366 ** The DJGPP compiler environment looks mostly like Unix, but it

367 ** lacks the fcntl() system call. So redefine fcntl() to be something

368 ** that always succeeds. This means that locking does not occur under

369 ** DJGPP. But it is DOS - what did you expect?

370 */

371 #ifdef __DJGPP__

372 { "fstat", 0, 0 },

373 #define osFstat(a,b,c) 0

374 #else

375 { "fstat", (sqlite3_syscall_ptr)fstat, 0 },

376 #define osFstat ((int()(int,struct stat))aSyscall[5].pCurrent)

377 #endif

378

379 { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 },

380 #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)

381

382 { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 },

383 #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent)

384

385 { "read", (sqlite3_syscall_ptr)read, 0 },

386 #define osRead ((ssize_t()(int,void,size_t))aSyscall[8].pCurrent)

387

388 #if defined(USE_PREAD) \|\| (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)

389 { "pread", (sqlite3_syscall_ptr)pread, 0 },

390 #else

391 { "pread", (sqlite3_syscall_ptr)0, 0 },

392 #endif

393 #define osPread ((ssize_t()(int,void,size_t,off_t))aSyscall[9].pCurrent)

394

395 #if defined(USE_PREAD64)

396 { "pread64", (sqlite3_syscall_ptr)pread64, 0 },

397 #else

398 { "pread64", (sqlite3_syscall_ptr)0, 0 },

399 #endif

400 #define osPread64 ((ssize_t()(int,void,size_t,off_t))aSyscall[10].pCurrent)

401

402 { "write", (sqlite3_syscall_ptr)write, 0 },

403 #define osWrite ((ssize_t()(int,const void,size_t))aSyscall[11].pCurrent)

404

405 #if defined(USE_PREAD) \|\| (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS)

406 { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },

407 #else

408 { "pwrite", (sqlite3_syscall_ptr)0, 0 },

409 #endif

410 #define osPwrite ((ssize_t()(int,const void,size_t,off_t))\

411 aSyscall[12].pCurrent)

412

413 #if defined(USE_PREAD64)

414 { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 },

415 #else

416 { "pwrite64", (sqlite3_syscall_ptr)0, 0 },

417 #endif

418 #define osPwrite64 ((ssize_t()(int,const void,size_t,off_t))\

419 aSyscall[13].pCurrent)

420

421 { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },

422 #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)

423

424 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE

425 { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 },

426 #else

427 { "fallocate", (sqlite3_syscall_ptr)0, 0 },

428 #endif

429 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)

430

431 { "unlink", (sqlite3_syscall_ptr)unlink, 0 },

432 #define osUnlink ((int()(const char))aSyscall[16].pCurrent)

433

434 { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },

435 #define osOpenDirectory ((int()(const char,int*))aSyscall[17].pCurrent)

436

437 { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },

438 #define osMkdir ((int()(const char,mode_t))aSyscall[18].pCurrent)

439

440 { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },

441 #define osRmdir ((int()(const char))aSyscall[19].pCurrent)

442

443 { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 },

444 #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)

445

446 #if !defined(SQLITE_OMIT_WAL) \|\| SQLITE_MAX_MMAP_SIZE>0

447 { "mmap", (sqlite3_syscall_ptr)mmap, 0 },

448 #define osMmap ((void()(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent)

449

450 { "munmap", (sqlite3_syscall_ptr)munmap, 0 },

451 #define osMunmap ((void()(void*,size_t))aSyscall[22].pCurrent)

452

453 #if HAVE_MREMAP

454 { "mremap", (sqlite3_syscall_ptr)mremap, 0 },

455 #else

456 { "mremap", (sqlite3_syscall_ptr)0, 0 },

457 #endif

458 #define osMremap ((void()(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)

459 { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 },

460 #define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent)

461

462 #endif

463

464 }; /* End of the overrideable system calls */

465

466 /*

467 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the

468 ** "unix" VFSes. Return SQLITE_OK opon successfully updating the

469 ** system call pointer, or SQLITE_NOTFOUND if there is no configurable

470 ** system call named zName.

471 */

472 static int unixSetSystemCall(

473 sqlite3_vfs pNotUsed, / The VFS pointer. Not used */

474 const char zName, / Name of system call to override */

475 sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */

476 ){

477 unsigned int i;

478 int rc = SQLITE_NOTFOUND;

479

480 UNUSED_PARAMETER(pNotUsed);

481 if( zName==0 ){

482 /* If no zName is given, restore all system calls to their default

483 ** settings and return NULL

484 */

485 rc = SQLITE_OK;

486 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){

487 if( aSyscall[i].pDefault ){

488 aSyscall[i].pCurrent = aSyscall[i].pDefault;

489 }

490 }

491 }else{

492 /* If zName is specified, operate on only the one system call

493 ** specified.

494 */

495 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){

496 if( strcmp(zName, aSyscall[i].zName)==0 ){

497 if( aSyscall[i].pDefault==0 ){

498 aSyscall[i].pDefault = aSyscall[i].pCurrent;

499 }

500 rc = SQLITE_OK;

501 if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;

502 aSyscall[i].pCurrent = pNewFunc;

503 break;

504 }

505 }

506 }

507 return rc;

508 }

509

510 /*

511 ** Return the value of a system call. Return NULL if zName is not a

512 ** recognized system call name. NULL is also returned if the system call

513 ** is currently undefined.

514 */

515 static sqlite3_syscall_ptr unixGetSystemCall(

516 sqlite3_vfs *pNotUsed,

517 const char *zName

518 ){

519 unsigned int i;

520

521 UNUSED_PARAMETER(pNotUsed);

522 for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){

523 if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;

524 }

525 return 0;

526 }

527

528 /*

529 ** Return the name of the first system call after zName. If zName==NULL

530 ** then return the name of the first system call. Return NULL if zName

531 ** is the last system call or if zName is not the name of a valid

532 ** system call.

533 */

534 static const char unixNextSystemCall(sqlite3_vfs p, const char *zName){

535 int i = -1;

536

537 UNUSED_PARAMETER(p);

538 if( zName ){

539 for(i=0; i<ArraySize(aSyscall)-1; i++){

540 if( strcmp(zName, aSyscall[i].zName)==0 ) break;

541 }

542 }

543 for(i++; i<ArraySize(aSyscall); i++){

544 if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;

545 }

546 return 0;

547 }

548

549 /*

550 ** Do not accept any file descriptor less than this value, in order to avoid

551 ** opening database file using file descriptors that are commonly used for

552 ** standard input, output, and error.

553 */

554 #ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR

555 # define SQLITE_MINIMUM_FILE_DESCRIPTOR 3

556 #endif

557

558 /*

559 ** Invoke open(). Do so multiple times, until it either succeeds or

560 ** fails for some reason other than EINTR.

561 **

562 ** If the file creation mode "m" is 0 then set it to the default for

563 ** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally

564 ** 0644) as modified by the system umask. If m is not 0, then

565 ** make the file creation mode be exactly m ignoring the umask.

566 **

567 ** The m parameter will be non-zero only when creating -wal, -journal,

568 ** and -shm files. We want those files to have exactly the same

569 ** permissions as their original database, unadulterated by the umask.

570 ** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a

571 ** transaction crashes and leaves behind hot journals, then any

572 ** process that is able to write to the database will also be able to

573 ** recover the hot journals.

574 */

575 static int robust_open(const char *z, int f, mode_t m){

576 int fd;

577 mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;

578 while(1){

579 #if defined(O_CLOEXEC)

580 fd = osOpen(z,f\|O_CLOEXEC,m2);

581 #else

582 fd = osOpen(z,f,m2);

583 #endif

584 if( fd<0 ){

585 if( errno==EINTR ) continue;

586 break;

587 }

588 if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;

589 osClose(fd);

590 sqlite3_log(SQLITE_WARNING,

591 "attempt to open \"%s\" as file descriptor %d", z, fd);

592 fd = -1;

593 if( osOpen("/dev/null", f, m)<0 ) break;

594 }

595 if( fd>=0 ){

596 if( m!=0 ){

597 struct stat statbuf;

598 if( osFstat(fd, &statbuf)==0

599 && statbuf.st_size==0

600 && (statbuf.st_mode&0777)!=m

601 ){

602 osFchmod(fd, m);

603 }

604 }

605 #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) \|\| O_CLOEXEC==0)

606 osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) \| FD_CLOEXEC);

607 #endif

608 }

609 return fd;

610 }

611

612 /*

613 ** Helper functions to obtain and relinquish the global mutex. The

614 ** global mutex is used to protect the unixInodeInfo and

615 ** vxworksFileId objects used by this file, all of which may be

616 ** shared by multiple threads.

617 **

618 ** Function unixMutexHeld() is used to assert() that the global mutex

619 ** is held when required. This function is only used as part of assert()

620 ** statements. e.g.

621 **

622 ** unixEnterMutex()

623 ** assert( unixMutexHeld() );

624 ** unixEnterLeave()

625 */

626 static void unixEnterMutex(void){

627 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));

628 }

629 static void unixLeaveMutex(void){

630 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));

631 }

632 #ifdef SQLITE_DEBUG

633 static int unixMutexHeld(void) {

634 return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));

635 }

636 #endif

637

638

639 #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)

640 /*

641 ** Helper function for printing out trace information from debugging

642 ** binaries. This returns the string representation of the supplied

643 ** integer lock-type.

644 */

645 static const char *azFileLock(int eFileLock){

646 switch( eFileLock ){

647 case NO_LOCK: return "NONE";

648 case SHARED_LOCK: return "SHARED";

649 case RESERVED_LOCK: return "RESERVED";

650 case PENDING_LOCK: return "PENDING";

651 case EXCLUSIVE_LOCK: return "EXCLUSIVE";

652 }

653 return "ERROR";

654 }

655 #endif

656

657 #ifdef SQLITE_LOCK_TRACE

658 /*

659 ** Print out information about all locking operations.

660 **

661 ** This routine is used for troubleshooting locks on multithreaded

662 ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE

663 ** command-line option on the compiler. This code is normally

664 ** turned off.

665 */

666 static int lockTrace(int fd, int op, struct flock *p){

667 char zOpName, zType;

668 int s;

669 int savedErrno;

670 if( op==F_GETLK ){

671 zOpName = "GETLK";

672 }else if( op==F_SETLK ){

673 zOpName = "SETLK";

674 }else{

675 s = osFcntl(fd, op, p);

676 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);

677 return s;

678 }

679 if( p->l_type==F_RDLCK ){

680 zType = "RDLCK";

681 }else if( p->l_type==F_WRLCK ){

682 zType = "WRLCK";

683 }else if( p->l_type==F_UNLCK ){

684 zType = "UNLCK";

685 }else{

686 assert( 0 );

687 }

688 assert( p->l_whence==SEEK_SET );

689 s = osFcntl(fd, op, p);

690 savedErrno = errno;

691 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",

692 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,

693 (int)p->l_pid, s);

694 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK \|\| p->l_type==F_WRLCK) ){

695 struct flock l2;

696 l2 = *p;

697 osFcntl(fd, F_GETLK, &l2);

698 if( l2.l_type==F_RDLCK ){

699 zType = "RDLCK";

700 }else if( l2.l_type==F_WRLCK ){

701 zType = "WRLCK";

702 }else if( l2.l_type==F_UNLCK ){

703 zType = "UNLCK";

704 }else{

705 assert( 0 );

706 }

707 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",

708 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);

709 }

710 errno = savedErrno;

711 return s;

712 }

713 #undef osFcntl

714 #define osFcntl lockTrace

715 #endif /* SQLITE_LOCK_TRACE */

716

717 /*

718 ** Retry ftruncate() calls that fail due to EINTR

719 **

720 ** All calls to ftruncate() within this file should be made through this wrapper .

721 ** On the Android platform, bypassing the logic below could lead to a corrupt

722 ** database.

723 */

724 static int robust_ftruncate(int h, sqlite3_int64 sz){

725 int rc;

726 #ifdef __ANDROID__

727 /* On Android, ftruncate() always uses 32-bit offsets, even if

728 ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to

729 ** truncate a file to any size larger than 2GiB. Silently ignore any

730 ** such attempts. */

731 if( sz>(sqlite3_int64)0x7FFFFFFF ){

732 rc = SQLITE_OK;

733 }else

734 #endif

735 do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );

736 return rc;

737 }

738

739 /*

740 ** This routine translates a standard POSIX errno code into something

741 ** useful to the clients of the sqlite3 functions. Specifically, it is

742 ** intended to translate a variety of "try again" errors into SQLITE_BUSY

743 ** and a variety of "please close the file descriptor NOW" errors into

744 ** SQLITE_IOERR

745 **

746 ** Errors during initialization of locks, or file system support for locks,

747 ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.

748 */

749 static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {

750 switch (posixError) {

751 #if 0

752 /* At one point this code was not commented out. In theory, this branch

753 ** should never be hit, as this function should only be called after

754 ** a locking-related function (i.e. fcntl()) has returned non-zero with

755 ** the value of errno as the first argument. Since a system call has failed,

756 ** errno should be non-zero.

757 **

758 ** Despite this, if errno really is zero, we still don't want to return

759 ** SQLITE_OK. The system call failed, and some SQLite error should be

760 ** propagated back to the caller. Commenting this branch out means errno==0

761 ** will be handled by the "default:" case below.

762 */

763 case 0:

764 return SQLITE_OK;

765 #endif

766

767 case EAGAIN:

768 case ETIMEDOUT:

769 case EBUSY:

770 case EINTR:

771 case ENOLCK:

772 /* random NFS retry error, unless during file system support

773 * introspection, in which it actually means what it says */

774 return SQLITE_BUSY;

775

776 case EACCES:

777 /* EACCES is like EAGAIN during locking operations, but not any other time*/

778 if( (sqliteIOErr == SQLITE_IOERR_LOCK) \|\|

779 (sqliteIOErr == SQLITE_IOERR_UNLOCK) \|\|

780 (sqliteIOErr == SQLITE_IOERR_RDLOCK) \|\|

781 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){

782 return SQLITE_BUSY;

783 }

784 /* else fall through */

785 case EPERM:

786 return SQLITE_PERM;

787

788 #if EOPNOTSUPP!=ENOTSUP

789 case EOPNOTSUPP:

790 /* something went terribly awry, unless during file system support

791 * introspection, in which it actually means what it says */

792 #endif

793 #ifdef ENOTSUP

794 case ENOTSUP:

795 /* invalid fd, unless during file system support introspection, in which

796 * it actually means what it says */

797 #endif

798 case EIO:

799 case EBADF:

800 case EINVAL:

801 case ENOTCONN:

802 case ENODEV:

803 case ENXIO:

804 case ENOENT:

805 #ifdef ESTALE /* ESTALE is not defined on Interix systems */

806 case ESTALE:

807 #endif

808 case ENOSYS:

809 /* these should force the client to close the file and reconnect */

810

811 default:

812 return sqliteIOErr;

813 }

814 }

815

816

817 /******************************************************************************

818 **************** Begin Unique File ID Utility Used By VxWorks *************

819 **

820 ** On most versions of unix, we can get a unique ID for a file by concatenating

821 ** the device number and the inode number. But this does not work on VxWorks.

822 ** On VxWorks, a unique file id must be based on the canonical filename.

823 **

824 ** A pointer to an instance of the following structure can be used as a

825 ** unique file ID in VxWorks. Each instance of this structure contains

826 ** a copy of the canonical filename. There is also a reference count.

827 ** The structure is reclaimed when the number of pointers to it drops to

828 ** zero.

829 **

830 ** There are never very many files open at one time and lookups are not

831 ** a performance-critical path, so it is sufficient to put these

832 ** structures on a linked list.

833 */

834 struct vxworksFileId {

835 struct vxworksFileId pNext; / Next in a list of them all */

836 int nRef; /* Number of references to this one */

837 int nName; /* Length of the zCanonicalName[] string */

838 char zCanonicalName; / Canonical filename */

839 };

840

841 #if OS_VXWORKS

842 /*

843 ** All unique filenames are held on a linked list headed by this

844 ** variable:

845 */

846 static struct vxworksFileId *vxworksFileList = 0;

847

848 /*

849 ** Simplify a filename into its canonical form

850 ** by making the following changes:

851 **

852 ** * removing any trailing and duplicate /

853 ** * convert /./ into just /

854 ** * convert /A/../ where A is any simple name into just /

855 **

856 ** Changes are made in-place. Return the new name length.

857 **

858 ** The original filename is in z[0..n-1]. Return the number of

859 ** characters in the simplified name.

860 */

861 static int vxworksSimplifyName(char *z, int n){

862 int i, j;

863 while( n>1 && z[n-1]=='/' ){ n--; }

864 for(i=j=0; i<n; i++){

865 if( z[i]=='/' ){

866 if( z[i+1]=='/' ) continue;

867 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){

868 i += 1;

869 continue;

870 }

871 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){

872 while( j>0 && z[j-1]!='/' ){ j--; }

873 if( j>0 ){ j--; }

874 i += 2;

875 continue;

876 }

877 }

878 z[j++] = z[i];

879 }

880 z[j] = 0;

881 return j;

882 }

883

884 /*

885 ** Find a unique file ID for the given absolute pathname. Return

886 ** a pointer to the vxworksFileId object. This pointer is the unique

887 ** file ID.

888 **

889 ** The nRef field of the vxworksFileId object is incremented before

890 ** the object is returned. A new vxworksFileId object is created

891 ** and added to the global list if necessary.

892 **

893 ** If a memory allocation error occurs, return NULL.

894 */

895 static struct vxworksFileId vxworksFindFileId(const char zAbsoluteName){

896 struct vxworksFileId pNew; / search key and new file ID */

897 struct vxworksFileId pCandidate; / For looping over existing file IDs */

898 int n; /* Length of zAbsoluteName string */

899

900 assert( zAbsoluteName[0]=='/' );

901 n = (int)strlen(zAbsoluteName);

902 pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );

903 if( pNew==0 ) return 0;

904 pNew->zCanonicalName = (char*)&pNew[1];

905 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);

906 n = vxworksSimplifyName(pNew->zCanonicalName, n);

907

908 /* Search for an existing entry that matching the canonical name.

909 ** If found, increment the reference count and return a pointer to

910 ** the existing file ID.

911 */

912 unixEnterMutex();

913 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){

914 if( pCandidate->nName==n

915 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0

916 ){

917 sqlite3_free(pNew);

918 pCandidate->nRef++;

919 unixLeaveMutex();

920 return pCandidate;

921 }

922 }

923

924 /* No match was found. We will make a new file ID */

925 pNew->nRef = 1;

926 pNew->nName = n;

927 pNew->pNext = vxworksFileList;

928 vxworksFileList = pNew;

929 unixLeaveMutex();

930 return pNew;

931 }

932

933 /*

934 ** Decrement the reference count on a vxworksFileId object. Free

935 ** the object when the reference count reaches zero.

936 */

937 static void vxworksReleaseFileId(struct vxworksFileId *pId){

938 unixEnterMutex();

939 assert( pId->nRef>0 );

940 pId->nRef--;

941 if( pId->nRef==0 ){

942 struct vxworksFileId **pp;

943 for(pp=&vxworksFileList; pp && pp!=pId; pp = &((*pp)->pNext)){}

944 assert( *pp==pId );

945 *pp = pId->pNext;

946 sqlite3_free(pId);

947 }

948 unixLeaveMutex();

949 }

950 #endif /* OS_VXWORKS */

951 /************* End of Unique File ID Utility Used By VxWorks **************

952 ******************************************************************************/

953

954

955 /******************************************************************************

956 ************************* Posix Advisory Locking **************************

957 **

958 ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996)

959 ** section 6.5.2.2 lines 483 through 490 specify that when a process

960 ** sets or clears a lock, that operation overrides any prior locks set

961 ** by the same process. It does not explicitly say so, but this implies

962 ** that it overrides locks set by the same process using a different

963 ** file descriptor. Consider this test case:

964 **

965 ** int fd1 = open("./file1", O_RDWR\|O_CREAT, 0644);

966 ** int fd2 = open("./file2", O_RDWR\|O_CREAT, 0644);

967 **

968 ** Suppose ./file1 and ./file2 are really the same file (because

969 ** one is a hard or symbolic link to the other) then if you set

970 ** an exclusive lock on fd1, then try to get an exclusive lock

971 ** on fd2, it works. I would have expected the second lock to

972 ** fail since there was already a lock on the file due to fd1.

973 ** But not so. Since both locks came from the same process, the

974 ** second overrides the first, even though they were on different

975 ** file descriptors opened on different file names.

976 **

977 ** This means that we cannot use POSIX locks to synchronize file access

978 ** among competing threads of the same process. POSIX locks will work fine

979 ** to synchronize access for threads in separate processes, but not

980 ** threads within the same process.

981 **

982 ** To work around the problem, SQLite has to manage file locks internally

983 ** on its own. Whenever a new database is opened, we have to find the

984 ** specific inode of the database file (the inode is determined by the

985 ** st_dev and st_ino fields of the stat structure that fstat() fills in)

986 ** and check for locks already existing on that inode. When locks are

987 ** created or removed, we have to look at our own internal record of the

988 ** locks to see if another thread has previously set a lock on that same

989 ** inode.

990 **

991 ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.

992 ** For VxWorks, we have to use the alternative unique ID system based on

993 ** canonical filename and implemented in the previous division.)

994 **

995 ** The sqlite3_file structure for POSIX is no longer just an integer file

996 ** descriptor. It is now a structure that holds the integer file

997 ** descriptor and a pointer to a structure that describes the internal

998 ** locks on the corresponding inode. There is one locking structure

999 ** per inode, so if the same inode is opened twice, both unixFile structures

1000 ** point to the same locking structure. The locking structure keeps

1001 ** a reference count (so we will know when to delete it) and a "cnt"

1002 ** field that tells us its internal lock status. cnt==0 means the

1003 ** file is unlocked. cnt==-1 means the file has an exclusive lock.

1004 ** cnt>0 means there are cnt shared locks on the file.

1005 **

1006 ** Any attempt to lock or unlock a file first checks the locking

1007 ** structure. The fcntl() system call is only invoked to set a

1008 ** POSIX lock if the internal lock structure transitions between

1009 ** a locked and an unlocked state.

1010 **

1011 ** But wait: there are yet more problems with POSIX advisory locks.

1012 **

1013 ** If you close a file descriptor that points to a file that has locks,

1014 ** all locks on that file that are owned by the current process are

1015 ** released. To work around this problem, each unixInodeInfo object

1016 ** maintains a count of the number of pending locks on tha inode.

1017 ** When an attempt is made to close an unixFile, if there are

1018 ** other unixFile open on the same inode that are holding locks, the call

1019 ** to close() the file descriptor is deferred until all of the locks clear.

1020 ** The unixInodeInfo structure keeps a list of file descriptors that need to

1021 ** be closed and that list is walked (and cleared) when the last lock

1022 ** clears.

1023 **

1024 ** Yet another problem: LinuxThreads do not play well with posix locks.

1025 **

1026 ** Many older versions of linux use the LinuxThreads library which is

1027 ** not posix compliant. Under LinuxThreads, a lock created by thread

1028 ** A cannot be modified or overridden by a different thread B.

1029 ** Only thread A can modify the lock. Locking behavior is correct

1030 ** if the appliation uses the newer Native Posix Thread Library (NPTL)

1031 ** on linux - with NPTL a lock created by thread A can override locks

1032 ** in thread B. But there is no way to know at compile-time which

1033 ** threading library is being used. So there is no way to know at

1034 ** compile-time whether or not thread A can override locks on thread B.

1035 ** One has to do a run-time check to discover the behavior of the

1036 ** current process.

1037 **

1038 ** SQLite used to support LinuxThreads. But support for LinuxThreads

1039 ** was dropped beginning with version 3.7.0. SQLite will still work with

1040 ** LinuxThreads provided that (1) there is no more than one connection

1041 ** per database file in the same process and (2) database connections

1042 ** do not move across threads.

1043 */

1044

1045 /*

1046 ** An instance of the following structure serves as the key used

1047 ** to locate a particular unixInodeInfo object.

1048 */

1049 struct unixFileId {

1050 dev_t dev; /* Device number */

1051 #if OS_VXWORKS

1052 struct vxworksFileId pId; / Unique file ID for vxworks. */

1053 #else

1054 ino_t ino; /* Inode number */

1055 #endif

1056 };

1057

1058 /*

1059 ** An instance of the following structure is allocated for each open

1060 ** inode. Or, on LinuxThreads, there is one of these structures for

1061 ** each inode opened by each thread.

1062 **

1063 ** A single inode can have multiple file descriptors, so each unixFile

1064 ** structure contains a pointer to an instance of this object and this

1065 ** object keeps a count of the number of unixFile pointing to it.

1066 */

1067 struct unixInodeInfo {

1068 struct unixFileId fileId; /* The lookup key */

1069 int nShared; /* Number of SHARED locks held */

1070 unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */

1071 unsigned char bProcessLock; /* An exclusive process lock is held */

1072 int nRef; /* Number of pointers to this structure */

1073 unixShmNode pShmNode; / Shared memory associated with this inode */

1074 int nLock; /* Number of outstanding file locks */

1075 UnixUnusedFd pUnused; / Unused file descriptors to close */

1076 unixInodeInfo pNext; / List of all unixInodeInfo objects */

1077 unixInodeInfo pPrev; / .... doubly linked */

1078 #if SQLITE_ENABLE_LOCKING_STYLE

1079 unsigned long long sharedByte; /* for AFP simulated shared lock */

1080 #endif

1081 #if OS_VXWORKS

1082 sem_t pSem; / Named POSIX semaphore */

1083 char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */

1084 #endif

1085 };

1086

1087 /*

1088 ** A lists of all unixInodeInfo objects.

1089 */

1090 static unixInodeInfo *inodeList = 0;

1091

1092 /*

1093 **

1094 ** This function - unixLogError_x(), is only ever called via the macro

1095 ** unixLogError().

1096 **

1097 ** It is invoked after an error occurs in an OS function and errno has been

1098 ** set. It logs a message using sqlite3_log() containing the current value of

1099 ** errno and, if possible, the human-readable equivalent from strerror() or

1100 ** strerror_r().

1101 **

1102 ** The first argument passed to the macro should be the error code that

1103 ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN).

1104 ** The two subsequent arguments should be the name of the OS function that

1105 ** failed (e.g. "unlink", "open") and the associated file-system path,

1106 ** if any.

1107 */

1108 #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__)

1109 static int unixLogErrorAtLine(

1110 int errcode, /* SQLite error code */

1111 const char zFunc, / Name of OS function that failed */

1112 const char zPath, / File path associated with error */

1113 int iLine /* Source line number where error occurred */

1114 ){

1115 char zErr; / Message from strerror() or equivalent */

1116 int iErrno = errno; /* Saved syscall error number */

1117

1118 /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use

1119 ** the strerror() function to obtain the human-readable error message

1120 ** equivalent to errno. Otherwise, use strerror_r().

1121 */

1122 #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R)

1123 char aErr[80];

1124 memset(aErr, 0, sizeof(aErr));

1125 zErr = aErr;

1126

1127 /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined,

1128 ** assume that the system provides the GNU version of strerror_r() that

1129 ** returns a pointer to a buffer containing the error message. That pointer

1130 ** may point to aErr[], or it may point to some static storage somewhere.

1131 ** Otherwise, assume that the system provides the POSIX version of

1132 ** strerror_r(), which always writes an error message into aErr[].

1133 **

1134 ** If the code incorrectly assumes that it is the POSIX version that is

1135 ** available, the error message will often be an empty string. Not a

1136 ** huge problem. Incorrectly concluding that the GNU version is available

1137 ** could lead to a segfault though.

1138 */

1139 #if defined(STRERROR_R_CHAR_P) \|\| defined(__USE_GNU)

1140 zErr =

1141 # endif

1142 strerror_r(iErrno, aErr, sizeof(aErr)-1);

1143

1144 #elif SQLITE_THREADSAFE

1145 /* This is a threadsafe build, but strerror_r() is not available. */

1146 zErr = "";

1147 #else

1148 /* Non-threadsafe build, use strerror(). */

1149 zErr = strerror(iErrno);

1150 #endif

1151

1152 if( zPath==0 ) zPath = "";

1153 sqlite3_log(errcode,

1154 "os_unix.c:%d: (%d) %s(%s) - %s",

1155 iLine, iErrno, zFunc, zPath, zErr

1156 );

1157

1158 return errcode;

1159 }

1160

1161 /*

1162 ** Close a file descriptor.

1163 **

1164 ** We assume that close() almost always works, since it is only in a

1165 ** very sick application or on a very sick platform that it might fail.

1166 ** If it does fail, simply leak the file descriptor, but do log the

1167 ** error.

1168 **

1169 ** Note that it is not safe to retry close() after EINTR since the

1170 ** file descriptor might have already been reused by another thread.

1171 ** So we don't even try to recover from an EINTR. Just log the error

1172 ** and move on.

1173 */

1174 static void robust_close(unixFile *pFile, int h, int lineno){

1175 if( osClose(h) ){

1176 unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close",

1177 pFile ? pFile->zPath : 0, lineno);

1178 }

1179 }

1180

1181 /*

1182 ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list.

1183 */

1184 static void closePendingFds(unixFile *pFile){

1185 unixInodeInfo *pInode = pFile->pInode;

1186 UnixUnusedFd *p;

1187 UnixUnusedFd *pNext;

1188 for(p=pInode->pUnused; p; p=pNext){

1189 pNext = p->pNext;

1190 robust_close(pFile, p->fd, __LINE__);

1191 sqlite3_free(p);

1192 }

1193 pInode->pUnused = 0;

1194 }

1195

1196 /*

1197 ** Release a unixInodeInfo structure previously allocated by findInodeInfo().

1198 **

1199 ** The mutex entered using the unixEnterMutex() function must be held

1200 ** when this function is called.

1201 */

1202 static void releaseInodeInfo(unixFile *pFile){

1203 unixInodeInfo *pInode = pFile->pInode;

1204 assert( unixMutexHeld() );

1205 if( ALWAYS(pInode) ){

1206 pInode->nRef--;

1207 if( pInode->nRef==0 ){

1208 assert( pInode->pShmNode==0 );

1209 closePendingFds(pFile);

1210 if( pInode->pPrev ){

1211 assert( pInode->pPrev->pNext==pInode );

1212 pInode->pPrev->pNext = pInode->pNext;

1213 }else{

1214 assert( inodeList==pInode );

1215 inodeList = pInode->pNext;

1216 }

1217 if( pInode->pNext ){

1218 assert( pInode->pNext->pPrev==pInode );

1219 pInode->pNext->pPrev = pInode->pPrev;

1220 }

1221 sqlite3_free(pInode);

1222 }

1223 }

1224 }

1225

1226 /*

1227 ** Given a file descriptor, locate the unixInodeInfo object that

1228 ** describes that file descriptor. Create a new one if necessary. The

1229 ** return value might be uninitialized if an error occurs.

1230 **

1231 ** The mutex entered using the unixEnterMutex() function must be held

1232 ** when this function is called.

1233 **

1234 ** Return an appropriate error code.

1235 */

1236 static int findInodeInfo(

1237 unixFile pFile, / Unix file with file desc used in the key */

1238 unixInodeInfo *ppInode / Return the unixInodeInfo object here */

1239 ){

1240 int rc; /* System call return code */

1241 int fd; /* The file descriptor for pFile */

1242 struct unixFileId fileId; /* Lookup key for the unixInodeInfo */

1243 struct stat statbuf; /* Low-level file information */

1244 unixInodeInfo pInode = 0; / Candidate unixInodeInfo object */

1245

1246 assert( unixMutexHeld() );

1247

1248 /* Get low-level information about the file that we can used to

1249 ** create a unique name for the file.

1250 */

1251 fd = pFile->h;

1252 rc = osFstat(fd, &statbuf);

1253 if( rc!=0 ){

1254 pFile->lastErrno = errno;

1255 #ifdef EOVERFLOW

1256 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;

1257 #endif

1258 return SQLITE_IOERR;

1259 }

1260

1261 #ifdef __APPLE__

1262 /* On OS X on an msdos filesystem, the inode number is reported

1263 ** incorrectly for zero-size files. See ticket #3260. To work

1264 ** around this problem (we consider it a bug in OS X, not SQLite)

1265 ** we always increase the file size to 1 by writing a single byte

1266 ** prior to accessing the inode number. The one byte written is

1267 ** an ASCII 'S' character which also happens to be the first byte

1268 ** in the header of every SQLite database. In this way, if there

1269 ** is a race condition such that another thread has already populated

1270 ** the first page of the database, no damage is done.

1271 */

1272 if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){

1273 do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR );

1274 if( rc!=1 ){

1275 pFile->lastErrno = errno;

1276 return SQLITE_IOERR;

1277 }

1278 rc = osFstat(fd, &statbuf);

1279 if( rc!=0 ){

1280 pFile->lastErrno = errno;

1281 return SQLITE_IOERR;

1282 }

1283 }

1284 #endif

1285

1286 memset(&fileId, 0, sizeof(fileId));

1287 fileId.dev = statbuf.st_dev;

1288 #if OS_VXWORKS

1289 fileId.pId = pFile->pId;

1290 #else

1291 fileId.ino = statbuf.st_ino;

1292 #endif

1293 pInode = inodeList;

1294 while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){

1295 pInode = pInode->pNext;

1296 }

1297 if( pInode==0 ){

1298 pInode = sqlite3_malloc( sizeof(*pInode) );

1299 if( pInode==0 ){

1300 return SQLITE_NOMEM;

1301 }

1302 memset(pInode, 0, sizeof(*pInode));

1303 memcpy(&pInode->fileId, &fileId, sizeof(fileId));

1304 pInode->nRef = 1;

1305 pInode->pNext = inodeList;

1306 pInode->pPrev = 0;

1307 if( inodeList ) inodeList->pPrev = pInode;

1308 inodeList = pInode;

1309 }else{

1310 pInode->nRef++;

1311 }

1312 *ppInode = pInode;

1313 return SQLITE_OK;

1314 }

1315

1316 /*

1317 ** Return TRUE if pFile has been renamed or unlinked since it was first opened.

1318 */

1319 static int fileHasMoved(unixFile *pFile){

1320 #if OS_VXWORKS

1321 return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId;

1322 #else

1323 struct stat buf;

1324 return pFile->pInode!=0 &&

1325 (osStat(pFile->zPath, &buf)!=0 \|\| buf.st_ino!=pFile->pInode->fileId.ino);

1326 #endif

1327 }

1328

1329

1330 /*

1331 ** Check a unixFile that is a database. Verify the following:

1332 **

1333 ** (1) There is exactly one hard link on the file

1334 ** (2) The file is not a symbolic link

1335 ** (3) The file has not been renamed or unlinked

1336 **

1337 ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.

1338 */

1339 static void verifyDbFile(unixFile *pFile){

1340 struct stat buf;

1341 int rc;

1342 if( pFile->ctrlFlags & UNIXFILE_WARNED ){

1343 /* One or more of the following warnings have already been issued. Do not

1344 ** repeat them so as not to clutter the error log */

1345 return;

1346 }

1347 rc = osFstat(pFile->h, &buf);

1348 if( rc!=0 ){

1349 sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);

1350 pFile->ctrlFlags \|= UNIXFILE_WARNED;

1351 return;

1352 }

1353 if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){

1354 sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);

1355 pFile->ctrlFlags \|= UNIXFILE_WARNED;

1356 return;

1357 }

1358 if( buf.st_nlink>1 ){

1359 sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);

1360 pFile->ctrlFlags \|= UNIXFILE_WARNED;

1361 return;

1362 }

1363 if( fileHasMoved(pFile) ){

1364 sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);

1365 pFile->ctrlFlags \|= UNIXFILE_WARNED;

1366 return;

1367 }

1368 }

1369

1370

1371 /*

1372 ** This routine checks if there is a RESERVED lock held on the specified

1373 ** file by this or any other process. If such a lock is held, set *pResOut

1374 ** to a non-zero value otherwise *pResOut is set to zero. The return value

1375 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.

1376 */

1377 static int unixCheckReservedLock(sqlite3_file id, int pResOut){

1378 int rc = SQLITE_OK;

1379 int reserved = 0;

1380 unixFile pFile = (unixFile)id;

1381

1382 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );

1383

1384 assert( pFile );

1385 unixEnterMutex(); /* Because pFile->pInode is shared across threads */

1386

1387 /* Check if a thread in this process holds such a lock */

1388 if( pFile->pInode->eFileLock>SHARED_LOCK ){

1389 reserved = 1;

1390 }

1391

1392 /* Otherwise see if some other process holds it.

1393 */

1394 #ifndef __DJGPP__

1395 if( !reserved && !pFile->pInode->bProcessLock ){

1396 struct flock lock;

1397 lock.l_whence = SEEK_SET;

1398 lock.l_start = RESERVED_BYTE;

1399 lock.l_len = 1;

1400 lock.l_type = F_WRLCK;

1401 if( osFcntl(pFile->h, F_GETLK, &lock) ){

1402 rc = SQLITE_IOERR_CHECKRESERVEDLOCK;

1403 pFile->lastErrno = errno;

1404 } else if( lock.l_type!=F_UNLCK ){

1405 reserved = 1;

1406 }

1407 }

1408 #endif

1409

1410 unixLeaveMutex();

1411 OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved));

1412

1413 *pResOut = reserved;

1414 return rc;

1415 }

1416

1417 /*

1418 ** Attempt to set a system-lock on the file pFile. The lock is

1419 ** described by pLock.

1420 **

1421 ** If the pFile was opened read/write from unix-excl, then the only lock

1422 ** ever obtained is an exclusive lock, and it is obtained exactly once

1423 ** the first time any lock is attempted. All subsequent system locking

1424 ** operations become no-ops. Locking operations still happen internally,

1425 ** in order to coordinate access between separate database connections

1426 ** within this process, but all of that is handled in memory and the

1427 ** operating system does not participate.

1428 **

1429 ** This function is a pass-through to fcntl(F_SETLK) if pFile is using

1430 ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl"

1431 ** and is read-only.

1432 **

1433 ** Zero is returned if the call completes successfully, or -1 if a call

1434 ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()).

1435 */

1436 static int unixFileLock(unixFile pFile, struct flock pLock){

1437 int rc;

1438 unixInodeInfo *pInode = pFile->pInode;

1439 assert( unixMutexHeld() );

1440 assert( pInode!=0 );

1441 if( ((pFile->ctrlFlags & UNIXFILE_EXCL)!=0 \|\| pInode->bProcessLock)

1442 && ((pFile->ctrlFlags & UNIXFILE_RDONLY)==0)

1443 ){

1444 if( pInode->bProcessLock==0 ){

1445 struct flock lock;

1446 assert( pInode->nLock==0 );

1447 lock.l_whence = SEEK_SET;

1448 lock.l_start = SHARED_FIRST;

1449 lock.l_len = SHARED_SIZE;

1450 lock.l_type = F_WRLCK;

1451 rc = osFcntl(pFile->h, F_SETLK, &lock);

1452 if( rc<0 ) return rc;

1453 pInode->bProcessLock = 1;

1454 pInode->nLock++;

1455 }else{

1456 rc = 0;

1457 }

1458 }else{

1459 rc = osFcntl(pFile->h, F_SETLK, pLock);

1460 }

1461 return rc;

1462 }

1463

1464 /*

1465 ** Lock the file with the lock specified by parameter eFileLock - one

1466 ** of the following:

1467 **

1468 ** (1) SHARED_LOCK

1469 ** (2) RESERVED_LOCK

1470 ** (3) PENDING_LOCK

1471 ** (4) EXCLUSIVE_LOCK

1472 **

1473 ** Sometimes when requesting one lock state, additional lock states

1474 ** are inserted in between. The locking might fail on one of the later

1475 ** transitions leaving the lock state different from what it started but

1476 ** still short of its goal. The following chart shows the allowed

1477 ** transitions and the inserted intermediate states:

1478 **

1479 ** UNLOCKED -> SHARED

1480 ** SHARED -> RESERVED

1481 ** SHARED -> (PENDING) -> EXCLUSIVE

1482 ** RESERVED -> (PENDING) -> EXCLUSIVE

1483 ** PENDING -> EXCLUSIVE

1484 **

1485 ** This routine will only increase a lock. Use the sqlite3OsUnlock()

1486 ** routine to lower a locking level.

1487 */

1488 static int unixLock(sqlite3_file *id, int eFileLock){

1489 /* The following describes the implementation of the various locks and

1490 ** lock transitions in terms of the POSIX advisory shared and exclusive

1491 ** lock primitives (called read-locks and write-locks below, to avoid

1492 ** confusion with SQLite lock names). The algorithms are complicated

1493 ** slightly in order to be compatible with windows systems simultaneously

1494 ** accessing the same database file, in case that is ever required.

1495 **

1496 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved

1497 ** byte', each single bytes at well known offsets, and the 'shared byte

1498 ** range', a range of 510 bytes at a well known offset.

1499 **

1500 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending

1501 ** byte'. If this is successful, a random byte from the 'shared byte

1502 ** range' is read-locked and the lock on the 'pending byte' released.

1503 **

1504 ** A process may only obtain a RESERVED lock after it has a SHARED lock.

1505 ** A RESERVED lock is implemented by grabbing a write-lock on the

1506 ** 'reserved byte'.

1507 **

1508 ** A process may only obtain a PENDING lock after it has obtained a

1509 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock

1510 ** on the 'pending byte'. This ensures that no new SHARED locks can be

1511 ** obtained, but existing SHARED locks are allowed to persist. A process

1512 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.

1513 ** This property is used by the algorithm for rolling back a journal file

1514 ** after a crash.

1515 **

1516 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is

1517 ** implemented by obtaining a write-lock on the entire 'shared byte

1518 ** range'. Since all other locks require a read-lock on one of the bytes

1519 ** within this range, this ensures that no other locks are held on the

1520 ** database.

1521 **

1522 ** The reason a single byte cannot be used instead of the 'shared byte

1523 ** range' is that some versions of windows do not support read-locks. By

1524 ** locking a random byte from a range, concurrent SHARED locks may exist

1525 ** even if the locking primitive used is always a write-lock.

1526 */

1527 int rc = SQLITE_OK;

1528 unixFile pFile = (unixFile)id;

1529 unixInodeInfo *pInode;

1530 struct flock lock;

1531 int tErrno = 0;

1532

1533 assert( pFile );

1534 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,

1535 azFileLock(eFileLock), azFileLock(pFile->eFileLock),

1536 azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared , getpid()));

1537

1538 /* If there is already a lock of this type or more restrictive on the

1539 ** unixFile, do nothing. Don't use the end_lock: exit path, as

1540 ** unixEnterMutex() hasn't been called yet.

1541 */

1542 if( pFile->eFileLock>=eFileLock ){

1543 OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h,

1544 azFileLock(eFileLock)));

1545 return SQLITE_OK;

1546 }

1547

1548 /* Make sure the locking sequence is correct.

1549 ** (1) We never move from unlocked to anything higher than shared lock.

1550 ** (2) SQLite never explicitly requests a pendig lock.

1551 ** (3) A shared lock is always held when a reserve lock is requested.

1552 */

1553 assert( pFile->eFileLock!=NO_LOCK \|\| eFileLock==SHARED_LOCK );

1554 assert( eFileLock!=PENDING_LOCK );

1555 assert( eFileLock!=RESERVED_LOCK \|\| pFile->eFileLock==SHARED_LOCK );

1556

1557 /* This mutex is needed because pFile->pInode is shared across threads

1558 */

1559 unixEnterMutex();

1560 pInode = pFile->pInode;

1561

1562 /* If some thread using this PID has a lock via a different unixFile*

1563 ** handle that precludes the requested lock, return BUSY.

1564 */

1565 if( (pFile->eFileLock!=pInode->eFileLock &&

1566 (pInode->eFileLock>=PENDING_LOCK \|\| eFileLock>SHARED_LOCK))

1567 ){

1568 rc = SQLITE_BUSY;

1569 goto end_lock;

1570 }

1571

1572 /* If a SHARED lock is requested, and some thread using this PID already

1573 ** has a SHARED or RESERVED lock, then increment reference counts and

1574 ** return SQLITE_OK.

1575 */

1576 if( eFileLock==SHARED_LOCK &&

1577 (pInode->eFileLock==SHARED_LOCK \|\| pInode->eFileLock==RESERVED_LOCK) ){

1578 assert( eFileLock==SHARED_LOCK );

1579 assert( pFile->eFileLock==0 );

1580 assert( pInode->nShared>0 );

1581 pFile->eFileLock = SHARED_LOCK;

1582 pInode->nShared++;

1583 pInode->nLock++;

1584 goto end_lock;

1585 }

1586

1587

1588 /* A PENDING lock is needed before acquiring a SHARED lock and before

1589 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will

1590 ** be released.

1591 */

1592 lock.l_len = 1L;

1593 lock.l_whence = SEEK_SET;

1594 if( eFileLock==SHARED_LOCK

1595 \|\| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)

1596 ){

1597 lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);

1598 lock.l_start = PENDING_BYTE;

1599 if( unixFileLock(pFile, &lock) ){

1600 tErrno = errno;

1601 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);

1602 if( rc!=SQLITE_BUSY ){

1603 pFile->lastErrno = tErrno;

1604 }

1605 goto end_lock;

1606 }

1607 }

1608

1609

1610 /* If control gets to this point, then actually go ahead and make

1611 ** operating system calls for the specified lock.

1612 */

1613 if( eFileLock==SHARED_LOCK ){

1614 assert( pInode->nShared==0 );

1615 assert( pInode->eFileLock==0 );

1616 assert( rc==SQLITE_OK );

1617

1618 /* Now get the read-lock */

1619 lock.l_start = SHARED_FIRST;

1620 lock.l_len = SHARED_SIZE;

1621 if( unixFileLock(pFile, &lock) ){

1622 tErrno = errno;

1623 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);

1624 }

1625

1626 /* Drop the temporary PENDING lock */

1627 lock.l_start = PENDING_BYTE;

1628 lock.l_len = 1L;

1629 lock.l_type = F_UNLCK;

1630 if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){

1631 /* This could happen with a network mount */

1632 tErrno = errno;

1633 rc = SQLITE_IOERR_UNLOCK;

1634 }

1635

1636 if( rc ){

1637 if( rc!=SQLITE_BUSY ){

1638 pFile->lastErrno = tErrno;

1639 }

1640 goto end_lock;

1641 }else{

1642 pFile->eFileLock = SHARED_LOCK;

1643 pInode->nLock++;

1644 pInode->nShared = 1;

1645 }

1646 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){

1647 /* We are trying for an exclusive lock but another thread in this

1648 ** same process is still holding a shared lock. */

1649 rc = SQLITE_BUSY;

1650 }else{

1651 /* The request was for a RESERVED or EXCLUSIVE lock. It is

1652 ** assumed that there is a SHARED or greater lock on the file

1653 ** already.

1654 */

1655 assert( 0!=pFile->eFileLock );

1656 lock.l_type = F_WRLCK;

1657

1658 assert( eFileLock==RESERVED_LOCK \|\| eFileLock==EXCLUSIVE_LOCK );

1659 if( eFileLock==RESERVED_LOCK ){

1660 lock.l_start = RESERVED_BYTE;

1661 lock.l_len = 1L;

1662 }else{

1663 lock.l_start = SHARED_FIRST;

1664 lock.l_len = SHARED_SIZE;

1665 }

1666

1667 if( unixFileLock(pFile, &lock) ){

1668 tErrno = errno;

1669 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);

1670 if( rc!=SQLITE_BUSY ){

1671 pFile->lastErrno = tErrno;

1672 }

1673 }

1674 }

1675

1676

1677 #ifdef SQLITE_DEBUG

1678 /* Set up the transaction-counter change checking flags when

1679 ** transitioning from a SHARED to a RESERVED lock. The change

1680 ** from SHARED to RESERVED marks the beginning of a normal

1681 ** write operation (not a hot journal rollback).

1682 */

1683 if( rc==SQLITE_OK

1684 && pFile->eFileLock<=SHARED_LOCK

1685 && eFileLock==RESERVED_LOCK

1686 ){

1687 pFile->transCntrChng = 0;

1688 pFile->dbUpdate = 0;

1689 pFile->inNormalWrite = 1;

1690 }

1691 #endif

1692

1693

1694 if( rc==SQLITE_OK ){

1695 pFile->eFileLock = eFileLock;

1696 pInode->eFileLock = eFileLock;

1697 }else if( eFileLock==EXCLUSIVE_LOCK ){

1698 pFile->eFileLock = PENDING_LOCK;

1699 pInode->eFileLock = PENDING_LOCK;

1700 }

1701

1702 end_lock:

1703 unixLeaveMutex();

1704 OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock),

1705 rc==SQLITE_OK ? "ok" : "failed"));

1706 return rc;

1707 }

1708

1709 /*

1710 ** Add the file descriptor used by file handle pFile to the corresponding

1711 ** pUnused list.

1712 */

1713 static void setPendingFd(unixFile *pFile){

1714 unixInodeInfo *pInode = pFile->pInode;

1715 UnixUnusedFd *p = pFile->pUnused;

1716 p->pNext = pInode->pUnused;

1717 pInode->pUnused = p;

1718 pFile->h = -1;

1719 pFile->pUnused = 0;

1720 }

1721

1722 /*

1723 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

1724 ** must be either NO_LOCK or SHARED_LOCK.

1725 **

1726 ** If the locking level of the file descriptor is already at or below

1727 ** the requested locking level, this routine is a no-op.

1728 **

1729 ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED

1730 ** the byte range is divided into 2 parts and the first part is unlocked then

1731 ** set to a read lock, then the other part is simply unlocked. This works

1732 ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to

1733 ** remove the write lock on a region when a read lock is set.

1734 */

1735 static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){

1736 unixFile pFile = (unixFile)id;

1737 unixInodeInfo *pInode;

1738 struct flock lock;

1739 int rc = SQLITE_OK;

1740

1741 assert( pFile );

1742 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,

1743 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,

1744 getpid()));

1745

1746 assert( eFileLock<=SHARED_LOCK );

1747 if( pFile->eFileLock<=eFileLock ){

1748 return SQLITE_OK;

1749 }

1750 unixEnterMutex();

1751 pInode = pFile->pInode;

1752 assert( pInode->nShared!=0 );

1753 if( pFile->eFileLock>SHARED_LOCK ){

1754 assert( pInode->eFileLock==pFile->eFileLock );

1755

1756 #ifdef SQLITE_DEBUG

1757 /* When reducing a lock such that other processes can start

1758 ** reading the database file again, make sure that the

1759 ** transaction counter was updated if any part of the database

1760 ** file changed. If the transaction counter is not updated,

1761 ** other connections to the same file might not realize that

1762 ** the file has changed and hence might not know to flush their

1763 ** cache. The use of a stale cache can lead to database corruption.

1764 */

1765 pFile->inNormalWrite = 0;

1766 #endif

1767

1768 /* downgrading to a shared lock on NFS involves clearing the write lock

1769 ** before establishing the readlock - to avoid a race condition we downgrade

1770 ** the lock in 2 blocks, so that part of the range will be covered by a

1771 ** write lock until the rest is covered by a read lock:

1772 ** 1: [WWWWW]

1773 ** 2: [....W]

1774 ** 3: [RRRRW]

1775 ** 4: [RRRR.]

1776 */

1777 if( eFileLock==SHARED_LOCK ){

1778

1779 #if !defined(__APPLE__) \|\| !SQLITE_ENABLE_LOCKING_STYLE

1780 (void)handleNFSUnlock;

1781 assert( handleNFSUnlock==0 );

1782 #endif

1783 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

1784 if( handleNFSUnlock ){

1785 int tErrno; /* Error code from system call errors */

1786 off_t divSize = SHARED_SIZE - 1;

1787

1788 lock.l_type = F_UNLCK;

1789 lock.l_whence = SEEK_SET;

1790 lock.l_start = SHARED_FIRST;

1791 lock.l_len = divSize;

1792 if( unixFileLock(pFile, &lock)==(-1) ){

1793 tErrno = errno;

1794 rc = SQLITE_IOERR_UNLOCK;

1795 if( IS_LOCK_ERROR(rc) ){

1796 pFile->lastErrno = tErrno;

1797 }

1798 goto end_unlock;

1799 }

1800 lock.l_type = F_RDLCK;

1801 lock.l_whence = SEEK_SET;

1802 lock.l_start = SHARED_FIRST;

1803 lock.l_len = divSize;

1804 if( unixFileLock(pFile, &lock)==(-1) ){

1805 tErrno = errno;

1806 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);

1807 if( IS_LOCK_ERROR(rc) ){

1808 pFile->lastErrno = tErrno;

1809 }

1810 goto end_unlock;

1811 }

1812 lock.l_type = F_UNLCK;

1813 lock.l_whence = SEEK_SET;

1814 lock.l_start = SHARED_FIRST+divSize;

1815 lock.l_len = SHARED_SIZE-divSize;

1816 if( unixFileLock(pFile, &lock)==(-1) ){

1817 tErrno = errno;

1818 rc = SQLITE_IOERR_UNLOCK;

1819 if( IS_LOCK_ERROR(rc) ){

1820 pFile->lastErrno = tErrno;

1821 }

1822 goto end_unlock;

1823 }

1824 }else

1825 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */

1826 {

1827 lock.l_type = F_RDLCK;

1828 lock.l_whence = SEEK_SET;

1829 lock.l_start = SHARED_FIRST;

1830 lock.l_len = SHARED_SIZE;

1831 if( unixFileLock(pFile, &lock) ){

1832 /* In theory, the call to unixFileLock() cannot fail because another

1833 ** process is holding an incompatible lock. If it does, this

1834 ** indicates that the other process is not following the locking

1835 ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning

1836 ** SQLITE_BUSY would confuse the upper layer (in practice it causes

1837 ** an assert to fail). */

1838 rc = SQLITE_IOERR_RDLOCK;

1839 pFile->lastErrno = errno;

1840 goto end_unlock;

1841 }

1842 }

1843 }

1844 lock.l_type = F_UNLCK;

1845 lock.l_whence = SEEK_SET;

1846 lock.l_start = PENDING_BYTE;

1847 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );

1848 if( unixFileLock(pFile, &lock)==0 ){

1849 pInode->eFileLock = SHARED_LOCK;

1850 }else{

1851 rc = SQLITE_IOERR_UNLOCK;

1852 pFile->lastErrno = errno;

1853 goto end_unlock;

1854 }

1855 }

1856 if( eFileLock==NO_LOCK ){

1857 /* Decrement the shared lock counter. Release the lock using an

1858 ** OS call only when all threads in this same process have released

1859 ** the lock.

1860 */

1861 pInode->nShared--;

1862 if( pInode->nShared==0 ){

1863 lock.l_type = F_UNLCK;

1864 lock.l_whence = SEEK_SET;

1865 lock.l_start = lock.l_len = 0L;

1866 if( unixFileLock(pFile, &lock)==0 ){

1867 pInode->eFileLock = NO_LOCK;

1868 }else{

1869 rc = SQLITE_IOERR_UNLOCK;

1870 pFile->lastErrno = errno;

1871 pInode->eFileLock = NO_LOCK;

1872 pFile->eFileLock = NO_LOCK;

1873 }

1874 }

1875

1876 /* Decrement the count of locks against this same file. When the

1877 ** count reaches zero, close any other file descriptors whose close

1878 ** was deferred because of outstanding locks.

1879 */

1880 pInode->nLock--;

1881 assert( pInode->nLock>=0 );

1882 if( pInode->nLock==0 ){

1883 closePendingFds(pFile);

1884 }

1885 }

1886

1887 end_unlock:

1888 unixLeaveMutex();

1889 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;

1890 return rc;

1891 }

1892

1893 /*

1894 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

1895 ** must be either NO_LOCK or SHARED_LOCK.

1896 **

1897 ** If the locking level of the file descriptor is already at or below

1898 ** the requested locking level, this routine is a no-op.

1899 */

1900 static int unixUnlock(sqlite3_file *id, int eFileLock){

1901 #if SQLITE_MAX_MMAP_SIZE>0

1902 assert( eFileLock==SHARED_LOCK \|\| ((unixFile *)id)->nFetchOut==0 );

1903 #endif

1904 return posixUnlock(id, eFileLock, 0);

1905 }

1906

1907 #if SQLITE_MAX_MMAP_SIZE>0

1908 static int unixMapfile(unixFile *pFd, i64 nByte);

1909 static void unixUnmapfile(unixFile *pFd);

1910 #endif

1911

1912 /*

1913 ** This function performs the parts of the "close file" operation

1914 ** common to all locking schemes. It closes the directory and file

1915 ** handles, if they are valid, and sets all fields of the unixFile

1916 ** structure to 0.

1917 **

1918 ** It is not necessary to hold the mutex when this routine is called,

1919 ** even on VxWorks. A mutex will be acquired on VxWorks by the

1920 ** vxworksReleaseFileId() routine.

1921 */

1922 static int closeUnixFile(sqlite3_file *id){

1923 unixFile pFile = (unixFile)id;

1924 #if SQLITE_MAX_MMAP_SIZE>0

1925 unixUnmapfile(pFile);

1926 #endif

1927 if( pFile->h>=0 ){

1928 robust_close(pFile, pFile->h, __LINE__);

1929 pFile->h = -1;

1930 }

1931 #if OS_VXWORKS

1932 if( pFile->pId ){

1933 if( pFile->ctrlFlags & UNIXFILE_DELETE ){

1934 osUnlink(pFile->pId->zCanonicalName);

1935 }

1936 vxworksReleaseFileId(pFile->pId);

1937 pFile->pId = 0;

1938 }

1939 #endif

1940 #ifdef SQLITE_UNLINK_AFTER_CLOSE

1941 if( pFile->ctrlFlags & UNIXFILE_DELETE ){

1942 osUnlink(pFile->zPath);

1943 sqlite3_free((char*)&pFile->zPath);

1944 pFile->zPath = 0;

1945 }

1946 #endif

1947 OSTRACE(("CLOSE %-3d\n", pFile->h));

1948 OpenCounter(-1);

1949 sqlite3_free(pFile->pUnused);

1950 memset(pFile, 0, sizeof(unixFile));

1951 return SQLITE_OK;

1952 }

1953

1954 /*

1955 ** Close a file.

1956 */

1957 static int unixClose(sqlite3_file *id){

1958 int rc = SQLITE_OK;

1959 unixFile pFile = (unixFile )id;

1960 verifyDbFile(pFile);

1961 unixUnlock(id, NO_LOCK);

1962 unixEnterMutex();

1963

1964 /* unixFile.pInode is always valid here. Otherwise, a different close

1965 ** routine (e.g. nolockClose()) would be called instead.

1966 */

1967 assert( pFile->pInode->nLock>0 \|\| pFile->pInode->bProcessLock==0 );

1968 if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){

1969 /* If there are outstanding locks, do not actually close the file just

1970 ** yet because that would clear those locks. Instead, add the file

1971 ** descriptor to pInode->pUnused list. It will be automatically closed

1972 ** when the last lock is cleared.

1973 */

1974 setPendingFd(pFile);

1975 }

1976 releaseInodeInfo(pFile);

1977 rc = closeUnixFile(id);

1978 unixLeaveMutex();

1979 return rc;

1980 }

1981

1982 /************ End of the posix advisory lock implementation ***************

1983 ******************************************************************************/

1984

1985 /******************************************************************************

1986 **************************** No-op Locking ********************************

1987 **

1988 ** Of the various locking implementations available, this is by far the

1989 ** simplest: locking is ignored. No attempt is made to lock the database

1990 ** file for reading or writing.

1991 **

1992 ** This locking mode is appropriate for use on read-only databases

1993 ** (ex: databases that are burned into CD-ROM, for example.) It can

1994 ** also be used if the application employs some external mechanism to

1995 ** prevent simultaneous access of the same database by two or more

1996 ** database connections. But there is a serious risk of database

1997 ** corruption if this locking mode is used in situations where multiple

1998 ** database connections are accessing the same database file at the same

1999 ** time and one or more of those connections are writing.

2000 */

2001

2002 static int nolockCheckReservedLock(sqlite3_file NotUsed, int pResOut){

2003 UNUSED_PARAMETER(NotUsed);

2004 *pResOut = 0;

2005 return SQLITE_OK;

2006 }

2007 static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){

2008 UNUSED_PARAMETER2(NotUsed, NotUsed2);

2009 return SQLITE_OK;

2010 }

2011 static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){

2012 UNUSED_PARAMETER2(NotUsed, NotUsed2);

2013 return SQLITE_OK;

2014 }

2015

2016 /*

2017 ** Close the file.

2018 */

2019 static int nolockClose(sqlite3_file *id) {

2020 return closeUnixFile(id);

2021 }

2022

2023 /***************** End of the no-op lock implementation *******************

2024 ******************************************************************************/

2025

2026 /******************************************************************************

2027 *********************** Begin dot-file Locking ****************************

2028 **

2029 ** The dotfile locking implementation uses the existence of separate lock

2030 ** files (really a directory) to control access to the database. This works

2031 ** on just about every filesystem imaginable. But there are serious downsides:

2032 **

2033 ** (1) There is zero concurrency. A single reader blocks all other

2034 ** connections from reading or writing the database.

2035 **

2036 ** (2) An application crash or power loss can leave stale lock files

2037 ** sitting around that need to be cleared manually.

2038 **

2039 ** Nevertheless, a dotlock is an appropriate locking mode for use if no

2040 ** other locking strategy is available.

2041 **

2042 ** Dotfile locking works by creating a subdirectory in the same directory as

2043 ** the database and with the same name but with a ".lock" extension added.

2044 ** The existence of a lock directory implies an EXCLUSIVE lock. All other

2045 ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.

2046 */

2047

2048 /*

2049 ** The file suffix added to the data base filename in order to create the

2050 ** lock directory.

2051 */

2052 #define DOTLOCK_SUFFIX ".lock"

2053

2054 /*

2055 ** This routine checks if there is a RESERVED lock held on the specified

2056 ** file by this or any other process. If such a lock is held, set *pResOut

2057 ** to a non-zero value otherwise *pResOut is set to zero. The return value

2058 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.

2059 **

2060 ** In dotfile locking, either a lock exists or it does not. So in this

2061 ** variation of CheckReservedLock(), *pResOut is set to true if any lock

2062 ** is held on the file and false if the file is unlocked.

2063 */

2064 static int dotlockCheckReservedLock(sqlite3_file id, int pResOut) {

2065 int rc = SQLITE_OK;

2066 int reserved = 0;

2067 unixFile pFile = (unixFile)id;

2068

2069 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );

2070

2071 assert( pFile );

2072

2073 /* Check if a thread in this process holds such a lock */

2074 if( pFile->eFileLock>SHARED_LOCK ){

2075 /* Either this connection or some other connection in the same process

2076 ** holds a lock on the file. No need to check further. */

2077 reserved = 1;

2078 }else{

2079 /* The lock is held if and only if the lockfile exists */

2080 const char zLockFile = (const char)pFile->lockingContext;

2081 reserved = osAccess(zLockFile, 0)==0;

2082 }

2083 OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved));

2084 *pResOut = reserved;

2085 return rc;

2086 }

2087

2088 /*

2089 ** Lock the file with the lock specified by parameter eFileLock - one

2090 ** of the following:

2091 **

2092 ** (1) SHARED_LOCK

2093 ** (2) RESERVED_LOCK

2094 ** (3) PENDING_LOCK

2095 ** (4) EXCLUSIVE_LOCK

2096 **

2097 ** Sometimes when requesting one lock state, additional lock states

2098 ** are inserted in between. The locking might fail on one of the later

2099 ** transitions leaving the lock state different from what it started but

2100 ** still short of its goal. The following chart shows the allowed

2101 ** transitions and the inserted intermediate states:

2102 **

2103 ** UNLOCKED -> SHARED

2104 ** SHARED -> RESERVED

2105 ** SHARED -> (PENDING) -> EXCLUSIVE

2106 ** RESERVED -> (PENDING) -> EXCLUSIVE

2107 ** PENDING -> EXCLUSIVE

2108 **

2109 ** This routine will only increase a lock. Use the sqlite3OsUnlock()

2110 ** routine to lower a locking level.

2111 **

2112 ** With dotfile locking, we really only support state (4): EXCLUSIVE.

2113 ** But we track the other locking levels internally.

2114 */

2115 static int dotlockLock(sqlite3_file *id, int eFileLock) {

2116 unixFile pFile = (unixFile)id;

2117 char zLockFile = (char )pFile->lockingContext;

2118 int rc = SQLITE_OK;

2119

2120

2121 /* If we have any lock, then the lock file already exists. All we have

2122 ** to do is adjust our internal record of the lock level.

2123 */

2124 if( pFile->eFileLock > NO_LOCK ){

2125 pFile->eFileLock = eFileLock;

2126 /* Always update the timestamp on the old file */

2127 #ifdef HAVE_UTIME

2128 utime(zLockFile, NULL);

2129 #else

2130 utimes(zLockFile, NULL);

2131 #endif

2132 return SQLITE_OK;

2133 }

2134

2135 /* grab an exclusive lock */

2136 rc = osMkdir(zLockFile, 0777);

2137 if( rc<0 ){

2138 /* failed to open/create the lock directory */

2139 int tErrno = errno;

2140 if( EEXIST == tErrno ){

2141 rc = SQLITE_BUSY;

2142 } else {

2143 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);

2144 if( IS_LOCK_ERROR(rc) ){

2145 pFile->lastErrno = tErrno;

2146 }

2147 }

2148 return rc;

2149 }

2150

2151 /* got it, set the type and return ok */

2152 pFile->eFileLock = eFileLock;

2153 return rc;

2154 }

2155

2156 /*

2157 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

2158 ** must be either NO_LOCK or SHARED_LOCK.

2159 **

2160 ** If the locking level of the file descriptor is already at or below

2161 ** the requested locking level, this routine is a no-op.

2162 **

2163 ** When the locking level reaches NO_LOCK, delete the lock file.

2164 */

2165 static int dotlockUnlock(sqlite3_file *id, int eFileLock) {

2166 unixFile pFile = (unixFile)id;

2167 char zLockFile = (char )pFile->lockingContext;

2168 int rc;

2169

2170 assert( pFile );

2171 OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,

2172 pFile->eFileLock, getpid()));

2173 assert( eFileLock<=SHARED_LOCK );

2174

2175 /* no-op if possible */

2176 if( pFile->eFileLock==eFileLock ){

2177 return SQLITE_OK;

2178 }

2179

2180 /* To downgrade to shared, simply update our internal notion of the

2181 ** lock state. No need to mess with the file on disk.

2182 */

2183 if( eFileLock==SHARED_LOCK ){

2184 pFile->eFileLock = SHARED_LOCK;

2185 return SQLITE_OK;

2186 }

2187

2188 /* To fully unlock the database, delete the lock file */

2189 assert( eFileLock==NO_LOCK );

2190 rc = osRmdir(zLockFile);

2191 if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile);

2192 if( rc<0 ){

2193 int tErrno = errno;

2194 rc = 0;

2195 if( ENOENT != tErrno ){

2196 rc = SQLITE_IOERR_UNLOCK;

2197 }

2198 if( IS_LOCK_ERROR(rc) ){

2199 pFile->lastErrno = tErrno;

2200 }

2201 return rc;

2202 }

2203 pFile->eFileLock = NO_LOCK;

2204 return SQLITE_OK;

2205 }

2206

2207 /*

2208 ** Close a file. Make sure the lock has been released before closing.

2209 */

2210 static int dotlockClose(sqlite3_file *id) {

2211 int rc = SQLITE_OK;

2212 if( id ){

2213 unixFile pFile = (unixFile)id;

2214 dotlockUnlock(id, NO_LOCK);

2215 sqlite3_free(pFile->lockingContext);

2216 rc = closeUnixFile(id);

2217 }

2218 return rc;

2219 }

2220 /**************** End of the dot-file lock implementation *****************

2221 ******************************************************************************/

2222

2223 /******************************************************************************

2224 ************************ Begin flock Locking ******************************

2225 **

2226 ** Use the flock() system call to do file locking.

2227 **

2228 ** flock() locking is like dot-file locking in that the various

2229 ** fine-grain locking levels supported by SQLite are collapsed into

2230 ** a single exclusive lock. In other words, SHARED, RESERVED, and

2231 ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite

2232 ** still works when you do this, but concurrency is reduced since

2233 ** only a single process can be reading the database at a time.

2234 **

2235 ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if

2236 ** compiling for VXWORKS.

2237 */

2238 #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS

2239

2240 /*

2241 ** Retry flock() calls that fail with EINTR

2242 */

2243 #ifdef EINTR

2244 static int robust_flock(int fd, int op){

2245 int rc;

2246 do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR );

2247 return rc;

2248 }

2249 #else

2250 # define robust_flock(a,b) flock(a,b)

2251 #endif

2252

2253

2254 /*

2255 ** This routine checks if there is a RESERVED lock held on the specified

2256 ** file by this or any other process. If such a lock is held, set *pResOut

2257 ** to a non-zero value otherwise *pResOut is set to zero. The return value

2258 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.

2259 */

2260 static int flockCheckReservedLock(sqlite3_file id, int pResOut){

2261 int rc = SQLITE_OK;

2262 int reserved = 0;

2263 unixFile pFile = (unixFile)id;

2264

2265 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );

2266

2267 assert( pFile );

2268

2269 /* Check if a thread in this process holds such a lock */

2270 if( pFile->eFileLock>SHARED_LOCK ){

2271 reserved = 1;

2272 }

2273

2274 /* Otherwise see if some other process holds it. */

2275 if( !reserved ){

2276 /* attempt to get the lock */

2277 int lrc = robust_flock(pFile->h, LOCK_EX \| LOCK_NB);

2278 if( !lrc ){

2279 /* got the lock, unlock it */

2280 lrc = robust_flock(pFile->h, LOCK_UN);

2281 if ( lrc ) {

2282 int tErrno = errno;

2283 /* unlock failed with an error */

2284 lrc = SQLITE_IOERR_UNLOCK;

2285 if( IS_LOCK_ERROR(lrc) ){

2286 pFile->lastErrno = tErrno;

2287 rc = lrc;

2288 }

2289 }

2290 } else {

2291 int tErrno = errno;

2292 reserved = 1;

2293 /* someone else might have it reserved */

2294 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);

2295 if( IS_LOCK_ERROR(lrc) ){

2296 pFile->lastErrno = tErrno;

2297 rc = lrc;

2298 }

2299 }

2300 }

2301 OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved));

2302

2303 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS

2304 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){

2305 rc = SQLITE_OK;

2306 reserved=1;

2307 }

2308 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */

2309 *pResOut = reserved;

2310 return rc;

2311 }

2312

2313 /*

2314 ** Lock the file with the lock specified by parameter eFileLock - one

2315 ** of the following:

2316 **

2317 ** (1) SHARED_LOCK

2318 ** (2) RESERVED_LOCK

2319 ** (3) PENDING_LOCK

2320 ** (4) EXCLUSIVE_LOCK

2321 **

2322 ** Sometimes when requesting one lock state, additional lock states

2323 ** are inserted in between. The locking might fail on one of the later

2324 ** transitions leaving the lock state different from what it started but

2325 ** still short of its goal. The following chart shows the allowed

2326 ** transitions and the inserted intermediate states:

2327 **

2328 ** UNLOCKED -> SHARED

2329 ** SHARED -> RESERVED

2330 ** SHARED -> (PENDING) -> EXCLUSIVE

2331 ** RESERVED -> (PENDING) -> EXCLUSIVE

2332 ** PENDING -> EXCLUSIVE

2333 **

2334 ** flock() only really support EXCLUSIVE locks. We track intermediate

2335 ** lock states in the sqlite3_file structure, but all locks SHARED or

2336 ** above are really EXCLUSIVE locks and exclude all other processes from

2337 ** access the file.

2338 **

2339 ** This routine will only increase a lock. Use the sqlite3OsUnlock()

2340 ** routine to lower a locking level.

2341 */

2342 static int flockLock(sqlite3_file *id, int eFileLock) {

2343 int rc = SQLITE_OK;

2344 unixFile pFile = (unixFile)id;

2345

2346 assert( pFile );

2347

2348 /* if we already have a lock, it is exclusive.

2349 ** Just adjust level and punt on outta here. */

2350 if (pFile->eFileLock > NO_LOCK) {

2351 pFile->eFileLock = eFileLock;

2352 return SQLITE_OK;

2353 }

2354

2355 /* grab an exclusive lock */

2356

2357 if (robust_flock(pFile->h, LOCK_EX \| LOCK_NB)) {

2358 int tErrno = errno;

2359 /* didn't get, must be busy */

2360 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);

2361 if( IS_LOCK_ERROR(rc) ){

2362 pFile->lastErrno = tErrno;

2363 }

2364 } else {

2365 /* got it, set the type and return ok */

2366 pFile->eFileLock = eFileLock;

2367 }

2368 OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock),

2369 rc==SQLITE_OK ? "ok" : "failed"));

2370 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS

2371 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){

2372 rc = SQLITE_BUSY;

2373 }

2374 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */

2375 return rc;

2376 }

2377

2378

2379 /*

2380 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

2381 ** must be either NO_LOCK or SHARED_LOCK.

2382 **

2383 ** If the locking level of the file descriptor is already at or below

2384 ** the requested locking level, this routine is a no-op.

2385 */

2386 static int flockUnlock(sqlite3_file *id, int eFileLock) {

2387 unixFile pFile = (unixFile)id;

2388

2389 assert( pFile );

2390 OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock,

2391 pFile->eFileLock, getpid()));

2392 assert( eFileLock<=SHARED_LOCK );

2393

2394 /* no-op if possible */

2395 if( pFile->eFileLock==eFileLock ){

2396 return SQLITE_OK;

2397 }

2398

2399 /* shared can just be set because we always have an exclusive */

2400 if (eFileLock==SHARED_LOCK) {

2401 pFile->eFileLock = eFileLock;

2402 return SQLITE_OK;

2403 }

2404

2405 /* no, really, unlock. */

2406 if( robust_flock(pFile->h, LOCK_UN) ){

2407 #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS

2408 return SQLITE_OK;

2409 #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */

2410 return SQLITE_IOERR_UNLOCK;

2411 }else{

2412 pFile->eFileLock = NO_LOCK;

2413 return SQLITE_OK;

2414 }

2415 }

2416

2417 /*

2418 ** Close a file.

2419 */

2420 static int flockClose(sqlite3_file *id) {

2421 int rc = SQLITE_OK;

2422 if( id ){

2423 flockUnlock(id, NO_LOCK);

2424 rc = closeUnixFile(id);

2425 }

2426 return rc;

2427 }

2428

2429 #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */

2430

2431 /***************** End of the flock lock implementation *******************

2432 ******************************************************************************/

2433

2434 /******************************************************************************

2435 ********************** Begin Named Semaphore Locking **********************

2436 **

2437 ** Named semaphore locking is only supported on VxWorks.

2438 **

2439 ** Semaphore locking is like dot-lock and flock in that it really only

2440 ** supports EXCLUSIVE locking. Only a single process can read or write

2441 ** the database file at a time. This reduces potential concurrency, but

2442 ** makes the lock implementation much easier.

2443 */

2444 #if OS_VXWORKS

2445

2446 /*

2447 ** This routine checks if there is a RESERVED lock held on the specified

2448 ** file by this or any other process. If such a lock is held, set *pResOut

2449 ** to a non-zero value otherwise *pResOut is set to zero. The return value

2450 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.

2451 */

2452 static int semCheckReservedLock(sqlite3_file id, int pResOut) {

2453 int rc = SQLITE_OK;

2454 int reserved = 0;

2455 unixFile pFile = (unixFile)id;

2456

2457 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );

2458

2459 assert( pFile );

2460

2461 /* Check if a thread in this process holds such a lock */

2462 if( pFile->eFileLock>SHARED_LOCK ){

2463 reserved = 1;

2464 }

2465

2466 /* Otherwise see if some other process holds it. */

2467 if( !reserved ){

2468 sem_t *pSem = pFile->pInode->pSem;

2469

2470 if( sem_trywait(pSem)==-1 ){

2471 int tErrno = errno;

2472 if( EAGAIN != tErrno ){

2473 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);

2474 pFile->lastErrno = tErrno;

2475 } else {

2476 /* someone else has the lock when we are in NO_LOCK */

2477 reserved = (pFile->eFileLock < SHARED_LOCK);

2478 }

2479 }else{

2480 /* we could have it if we want it */

2481 sem_post(pSem);

2482 }

2483 }

2484 OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved));

2485

2486 *pResOut = reserved;

2487 return rc;

2488 }

2489

2490 /*

2491 ** Lock the file with the lock specified by parameter eFileLock - one

2492 ** of the following:

2493 **

2494 ** (1) SHARED_LOCK

2495 ** (2) RESERVED_LOCK

2496 ** (3) PENDING_LOCK

2497 ** (4) EXCLUSIVE_LOCK

2498 **

2499 ** Sometimes when requesting one lock state, additional lock states

2500 ** are inserted in between. The locking might fail on one of the later

2501 ** transitions leaving the lock state different from what it started but

2502 ** still short of its goal. The following chart shows the allowed

2503 ** transitions and the inserted intermediate states:

2504 **

2505 ** UNLOCKED -> SHARED

2506 ** SHARED -> RESERVED

2507 ** SHARED -> (PENDING) -> EXCLUSIVE

2508 ** RESERVED -> (PENDING) -> EXCLUSIVE

2509 ** PENDING -> EXCLUSIVE

2510 **

2511 ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate

2512 ** lock states in the sqlite3_file structure, but all locks SHARED or

2513 ** above are really EXCLUSIVE locks and exclude all other processes from

2514 ** access the file.

2515 **

2516 ** This routine will only increase a lock. Use the sqlite3OsUnlock()

2517 ** routine to lower a locking level.

2518 */

2519 static int semLock(sqlite3_file *id, int eFileLock) {

2520 unixFile pFile = (unixFile)id;

2521 sem_t *pSem = pFile->pInode->pSem;

2522 int rc = SQLITE_OK;

2523

2524 /* if we already have a lock, it is exclusive.

2525 ** Just adjust level and punt on outta here. */

2526 if (pFile->eFileLock > NO_LOCK) {

2527 pFile->eFileLock = eFileLock;

2528 rc = SQLITE_OK;

2529 goto sem_end_lock;

2530 }

2531

2532 /* lock semaphore now but bail out when already locked. */

2533 if( sem_trywait(pSem)==-1 ){

2534 rc = SQLITE_BUSY;

2535 goto sem_end_lock;

2536 }

2537

2538 /* got it, set the type and return ok */

2539 pFile->eFileLock = eFileLock;

2540

2541 sem_end_lock:

2542 return rc;

2543 }

2544

2545 /*

2546 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

2547 ** must be either NO_LOCK or SHARED_LOCK.

2548 **

2549 ** If the locking level of the file descriptor is already at or below

2550 ** the requested locking level, this routine is a no-op.

2551 */

2552 static int semUnlock(sqlite3_file *id, int eFileLock) {

2553 unixFile pFile = (unixFile)id;

2554 sem_t *pSem = pFile->pInode->pSem;

2555

2556 assert( pFile );

2557 assert( pSem );

2558 OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock,

2559 pFile->eFileLock, getpid()));

2560 assert( eFileLock<=SHARED_LOCK );

2561

2562 /* no-op if possible */

2563 if( pFile->eFileLock==eFileLock ){

2564 return SQLITE_OK;

2565 }

2566

2567 /* shared can just be set because we always have an exclusive */

2568 if (eFileLock==SHARED_LOCK) {

2569 pFile->eFileLock = eFileLock;

2570 return SQLITE_OK;

2571 }

2572

2573 /* no, really unlock. */

2574 if ( sem_post(pSem)==-1 ) {

2575 int rc, tErrno = errno;

2576 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);

2577 if( IS_LOCK_ERROR(rc) ){

2578 pFile->lastErrno = tErrno;

2579 }

2580 return rc;

2581 }

2582 pFile->eFileLock = NO_LOCK;

2583 return SQLITE_OK;

2584 }

2585

2586 /*

2587 ** Close a file.

2588 */

2589 static int semClose(sqlite3_file *id) {

2590 if( id ){

2591 unixFile pFile = (unixFile)id;

2592 semUnlock(id, NO_LOCK);

2593 assert( pFile );

2594 unixEnterMutex();

2595 releaseInodeInfo(pFile);

2596 unixLeaveMutex();

2597 closeUnixFile(id);

2598 }

2599 return SQLITE_OK;

2600 }

2601

2602 #endif /* OS_VXWORKS */

2603 /*

2604 ** Named semaphore locking is only available on VxWorks.

2605 **

2606 ************* End of the named semaphore lock implementation **************

2607 ******************************************************************************/

2608

2609

2610 /******************************************************************************

2611 ************************* Begin AFP Locking *******************************

2612 **

2613 ** AFP is the Apple Filing Protocol. AFP is a network filesystem found

2614 ** on Apple Macintosh computers - both OS9 and OSX.

2615 **

2616 ** Third-party implementations of AFP are available. But this code here

2617 ** only works on OSX.

2618 */

2619

2620 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

2621 /*

2622 ** The afpLockingContext structure contains all afp lock specific state

2623 */

2624 typedef struct afpLockingContext afpLockingContext;

2625 struct afpLockingContext {

2626 int reserved;

2627 const char dbPath; / Name of the open file */

2628 };

2629

2630 struct ByteRangeLockPB2

2631 {

2632 unsigned long long offset; /* offset to first byte to lock */

2633 unsigned long long length; /* nbr of bytes to lock */

2634 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */

2635 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */

2636 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */

2637 int fd; /* file desc to assoc this lock with */

2638 };

2639

2640 #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)

2641

2642 /*

2643 ** This is a utility for setting or clearing a bit-range lock on an

2644 ** AFP filesystem.

2645 **

2646 ** Return SQLITE_OK on success, SQLITE_BUSY on failure.

2647 */

2648 static int afpSetLock(

2649 const char path, / Name of the file to be locked or unlocked */

2650 unixFile pFile, / Open file descriptor on path */

2651 unsigned long long offset, /* First byte to be locked */

2652 unsigned long long length, /* Number of bytes to lock */

2653 int setLockFlag /* True to set lock. False to clear lock */

2654 ){

2655 struct ByteRangeLockPB2 pb;

2656 int err;

2657

2658 pb.unLockFlag = setLockFlag ? 0 : 1;

2659 pb.startEndFlag = 0;

2660 pb.offset = offset;

2661 pb.length = length;

2662 pb.fd = pFile->h;

2663

2664 OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",

2665 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),

2666 offset, length));

2667 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);

2668 if ( err==-1 ) {

2669 int rc;

2670 int tErrno = errno;

2671 OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n",

2672 path, tErrno, strerror(tErrno)));

2673 #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS

2674 rc = SQLITE_BUSY;

2675 #else

2676 rc = sqliteErrorFromPosixError(tErrno,

2677 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);

2678 #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */

2679 if( IS_LOCK_ERROR(rc) ){

2680 pFile->lastErrno = tErrno;

2681 }

2682 return rc;

2683 } else {

2684 return SQLITE_OK;

2685 }

2686 }

2687

2688 /*

2689 ** This routine checks if there is a RESERVED lock held on the specified

2690 ** file by this or any other process. If such a lock is held, set *pResOut

2691 ** to a non-zero value otherwise *pResOut is set to zero. The return value

2692 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.

2693 */

2694 static int afpCheckReservedLock(sqlite3_file id, int pResOut){

2695 int rc = SQLITE_OK;

2696 int reserved = 0;

2697 unixFile pFile = (unixFile)id;

2698 afpLockingContext *context;

2699

2700 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );

2701

2702 assert( pFile );

2703 context = (afpLockingContext *) pFile->lockingContext;

2704 if( context->reserved ){

2705 *pResOut = 1;

2706 return SQLITE_OK;

2707 }

2708 unixEnterMutex(); /* Because pFile->pInode is shared across threads */

2709

2710 /* Check if a thread in this process holds such a lock */

2711 if( pFile->pInode->eFileLock>SHARED_LOCK ){

2712 reserved = 1;

2713 }

2714

2715 /* Otherwise see if some other process holds it.

2716 */

2717 if( !reserved ){

2718 /* lock the RESERVED byte */

2719 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);

2720 if( SQLITE_OK==lrc ){

2721 /* if we succeeded in taking the reserved lock, unlock it to restore

2722 ** the original state */

2723 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);

2724 } else {

2725 /* if we failed to get the lock then someone else must have it */

2726 reserved = 1;

2727 }

2728 if( IS_LOCK_ERROR(lrc) ){

2729 rc=lrc;

2730 }

2731 }

2732

2733 unixLeaveMutex();

2734 OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved));

2735

2736 *pResOut = reserved;

2737 return rc;

2738 }

2739

2740 /*

2741 ** Lock the file with the lock specified by parameter eFileLock - one

2742 ** of the following:

2743 **

2744 ** (1) SHARED_LOCK

2745 ** (2) RESERVED_LOCK

2746 ** (3) PENDING_LOCK

2747 ** (4) EXCLUSIVE_LOCK

2748 **

2749 ** Sometimes when requesting one lock state, additional lock states

2750 ** are inserted in between. The locking might fail on one of the later

2751 ** transitions leaving the lock state different from what it started but

2752 ** still short of its goal. The following chart shows the allowed

2753 ** transitions and the inserted intermediate states:

2754 **

2755 ** UNLOCKED -> SHARED

2756 ** SHARED -> RESERVED

2757 ** SHARED -> (PENDING) -> EXCLUSIVE

2758 ** RESERVED -> (PENDING) -> EXCLUSIVE

2759 ** PENDING -> EXCLUSIVE

2760 **

2761 ** This routine will only increase a lock. Use the sqlite3OsUnlock()

2762 ** routine to lower a locking level.

2763 */

2764 static int afpLock(sqlite3_file *id, int eFileLock){

2765 int rc = SQLITE_OK;

2766 unixFile pFile = (unixFile)id;

2767 unixInodeInfo *pInode = pFile->pInode;

2768 afpLockingContext context = (afpLockingContext ) pFile->lockingContext;

2769

2770 assert( pFile );

2771 OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h,

2772 azFileLock(eFileLock), azFileLock(pFile->eFileLock),

2773 azFileLock(pInode->eFileLock), pInode->nShared , getpid()));

2774

2775 /* If there is already a lock of this type or more restrictive on the

2776 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as

2777 ** unixEnterMutex() hasn't been called yet.

2778 */

2779 if( pFile->eFileLock>=eFileLock ){

2780 OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h,

2781 azFileLock(eFileLock)));

2782 return SQLITE_OK;

2783 }

2784

2785 /* Make sure the locking sequence is correct

2786 ** (1) We never move from unlocked to anything higher than shared lock.

2787 ** (2) SQLite never explicitly requests a pendig lock.

2788 ** (3) A shared lock is always held when a reserve lock is requested.

2789 */

2790 assert( pFile->eFileLock!=NO_LOCK \|\| eFileLock==SHARED_LOCK );

2791 assert( eFileLock!=PENDING_LOCK );

2792 assert( eFileLock!=RESERVED_LOCK \|\| pFile->eFileLock==SHARED_LOCK );

2793

2794 /* This mutex is needed because pFile->pInode is shared across threads

2795 */

2796 unixEnterMutex();

2797 pInode = pFile->pInode;

2798

2799 /* If some thread using this PID has a lock via a different unixFile*

2800 ** handle that precludes the requested lock, return BUSY.

2801 */

2802 if( (pFile->eFileLock!=pInode->eFileLock &&

2803 (pInode->eFileLock>=PENDING_LOCK \|\| eFileLock>SHARED_LOCK))

2804 ){

2805 rc = SQLITE_BUSY;

2806 goto afp_end_lock;

2807 }

2808

2809 /* If a SHARED lock is requested, and some thread using this PID already

2810 ** has a SHARED or RESERVED lock, then increment reference counts and

2811 ** return SQLITE_OK.

2812 */

2813 if( eFileLock==SHARED_LOCK &&

2814 (pInode->eFileLock==SHARED_LOCK \|\| pInode->eFileLock==RESERVED_LOCK) ){

2815 assert( eFileLock==SHARED_LOCK );

2816 assert( pFile->eFileLock==0 );

2817 assert( pInode->nShared>0 );

2818 pFile->eFileLock = SHARED_LOCK;

2819 pInode->nShared++;

2820 pInode->nLock++;

2821 goto afp_end_lock;

2822 }

2823

2824 /* A PENDING lock is needed before acquiring a SHARED lock and before

2825 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will

2826 ** be released.

2827 */

2828 if( eFileLock==SHARED_LOCK

2829 \|\| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)

2830 ){

2831 int failed;

2832 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);

2833 if (failed) {

2834 rc = failed;

2835 goto afp_end_lock;

2836 }

2837 }

2838

2839 /* If control gets to this point, then actually go ahead and make

2840 ** operating system calls for the specified lock.

2841 */

2842 if( eFileLock==SHARED_LOCK ){

2843 int lrc1, lrc2, lrc1Errno = 0;

2844 long lk, mask;

2845

2846 assert( pInode->nShared==0 );

2847 assert( pInode->eFileLock==0 );

2848

2849 mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff;

2850 /* Now get the read-lock SHARED_LOCK */

2851 /* note that the quality of the randomness doesn't matter that much */

2852 lk = random();

2853 pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1);

2854 lrc1 = afpSetLock(context->dbPath, pFile,

2855 SHARED_FIRST+pInode->sharedByte, 1, 1);

2856 if( IS_LOCK_ERROR(lrc1) ){

2857 lrc1Errno = pFile->lastErrno;

2858 }

2859 /* Drop the temporary PENDING lock */

2860 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);

2861

2862 if( IS_LOCK_ERROR(lrc1) ) {

2863 pFile->lastErrno = lrc1Errno;

2864 rc = lrc1;

2865 goto afp_end_lock;

2866 } else if( IS_LOCK_ERROR(lrc2) ){

2867 rc = lrc2;

2868 goto afp_end_lock;

2869 } else if( lrc1 != SQLITE_OK ) {

2870 rc = lrc1;

2871 } else {

2872 pFile->eFileLock = SHARED_LOCK;

2873 pInode->nLock++;

2874 pInode->nShared = 1;

2875 }

2876 }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){

2877 /* We are trying for an exclusive lock but another thread in this

2878 ** same process is still holding a shared lock. */

2879 rc = SQLITE_BUSY;

2880 }else{

2881 /* The request was for a RESERVED or EXCLUSIVE lock. It is

2882 ** assumed that there is a SHARED or greater lock on the file

2883 ** already.

2884 */

2885 int failed = 0;

2886 assert( 0!=pFile->eFileLock );

2887 if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) {

2888 /* Acquire a RESERVED lock */

2889 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);

2890 if( !failed ){

2891 context->reserved = 1;

2892 }

2893 }

2894 if (!failed && eFileLock == EXCLUSIVE_LOCK) {

2895 /* Acquire an EXCLUSIVE lock */

2896

2897 /* Remove the shared lock before trying the range. we'll need to

2898 ** reestablish the shared lock if we can't get the afpUnlock

2899 */

2900 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +

2901 pInode->sharedByte, 1, 0)) ){

2902 int failed2 = SQLITE_OK;

2903 /* now attemmpt to get the exclusive lock range */

2904 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST,

2905 SHARED_SIZE, 1);

2906 if( failed && (failed2 = afpSetLock(context->dbPath, pFile,

2907 SHARED_FIRST + pInode->sharedByte, 1, 1)) ){

2908 /* Can't reestablish the shared lock. Sqlite can't deal, this is

2909 ** a critical I/O error

2910 */

2911 rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :

2912 SQLITE_IOERR_LOCK;

2913 goto afp_end_lock;

2914 }

2915 }else{

2916 rc = failed;

2917 }

2918 }

2919 if( failed ){

2920 rc = failed;

2921 }

2922 }

2923

2924 if( rc==SQLITE_OK ){

2925 pFile->eFileLock = eFileLock;

2926 pInode->eFileLock = eFileLock;

2927 }else if( eFileLock==EXCLUSIVE_LOCK ){

2928 pFile->eFileLock = PENDING_LOCK;

2929 pInode->eFileLock = PENDING_LOCK;

2930 }

2931

2932 afp_end_lock:

2933 unixLeaveMutex();

2934 OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock),

2935 rc==SQLITE_OK ? "ok" : "failed"));

2936 return rc;

2937 }

2938

2939 /*

2940 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

2941 ** must be either NO_LOCK or SHARED_LOCK.

2942 **

2943 ** If the locking level of the file descriptor is already at or below

2944 ** the requested locking level, this routine is a no-op.

2945 */

2946 static int afpUnlock(sqlite3_file *id, int eFileLock) {

2947 int rc = SQLITE_OK;

2948 unixFile pFile = (unixFile)id;

2949 unixInodeInfo *pInode;

2950 afpLockingContext context = (afpLockingContext ) pFile->lockingContext;

2951 int skipShared = 0;

2952 #ifdef SQLITE_TEST

2953 int h = pFile->h;

2954 #endif

2955

2956 assert( pFile );

2957 OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock,

2958 pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,

2959 getpid()));

2960

2961 assert( eFileLock<=SHARED_LOCK );

2962 if( pFile->eFileLock<=eFileLock ){

2963 return SQLITE_OK;

2964 }

2965 unixEnterMutex();

2966 pInode = pFile->pInode;

2967 assert( pInode->nShared!=0 );

2968 if( pFile->eFileLock>SHARED_LOCK ){

2969 assert( pInode->eFileLock==pFile->eFileLock );

2970 SimulateIOErrorBenign(1);

2971 SimulateIOError( h=(-1) )

2972 SimulateIOErrorBenign(0);

2973

2974 #ifdef SQLITE_DEBUG

2975 /* When reducing a lock such that other processes can start

2976 ** reading the database file again, make sure that the

2977 ** transaction counter was updated if any part of the database

2978 ** file changed. If the transaction counter is not updated,

2979 ** other connections to the same file might not realize that

2980 ** the file has changed and hence might not know to flush their

2981 ** cache. The use of a stale cache can lead to database corruption.

2982 */

2983 assert( pFile->inNormalWrite==0

2984 \|\| pFile->dbUpdate==0

2985 \|\| pFile->transCntrChng==1 );

2986 pFile->inNormalWrite = 0;

2987 #endif

2988

2989 if( pFile->eFileLock==EXCLUSIVE_LOCK ){

2990 rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);

2991 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK \|\| pInode->nShared>1) ){

2992 /* only re-establish the shared lock if necessary */

2993 int sharedLockByte = SHARED_FIRST+pInode->sharedByte;

2994 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1);

2995 } else {

2996 skipShared = 1;

2997 }

2998 }

2999 if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){

3000 rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);

3001 }

3002 if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){

3003 rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);

3004 if( !rc ){

3005 context->reserved = 0;

3006 }

3007 }

3008 if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK \|\| pInode->nShared>1)){

3009 pInode->eFileLock = SHARED_LOCK;

3010 }

3011 }

3012 if( rc==SQLITE_OK && eFileLock==NO_LOCK ){

3013

3014 /* Decrement the shared lock counter. Release the lock using an

3015 ** OS call only when all threads in this same process have released

3016 ** the lock.

3017 */

3018 unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte;

3019 pInode->nShared--;

3020 if( pInode->nShared==0 ){

3021 SimulateIOErrorBenign(1);

3022 SimulateIOError( h=(-1) )

3023 SimulateIOErrorBenign(0);

3024 if( !skipShared ){

3025 rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0);

3026 }

3027 if( !rc ){

3028 pInode->eFileLock = NO_LOCK;

3029 pFile->eFileLock = NO_LOCK;

3030 }

3031 }

3032 if( rc==SQLITE_OK ){

3033 pInode->nLock--;

3034 assert( pInode->nLock>=0 );

3035 if( pInode->nLock==0 ){

3036 closePendingFds(pFile);

3037 }

3038 }

3039 }

3040

3041 unixLeaveMutex();

3042 if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;

3043 return rc;

3044 }

3045

3046 /*

3047 ** Close a file & cleanup AFP specific locking context

3048 */

3049 static int afpClose(sqlite3_file *id) {

3050 int rc = SQLITE_OK;

3051 if( id ){

3052 unixFile pFile = (unixFile)id;

3053 afpUnlock(id, NO_LOCK);

3054 unixEnterMutex();

3055 if( pFile->pInode && pFile->pInode->nLock ){

3056 /* If there are outstanding locks, do not actually close the file just

3057 ** yet because that would clear those locks. Instead, add the file

3058 ** descriptor to pInode->aPending. It will be automatically closed when

3059 ** the last lock is cleared.

3060 */

3061 setPendingFd(pFile);

3062 }

3063 releaseInodeInfo(pFile);

3064 sqlite3_free(pFile->lockingContext);

3065 rc = closeUnixFile(id);

3066 unixLeaveMutex();

3067 }

3068 return rc;

3069 }

3070

3071 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */

3072 /*

3073 ** The code above is the AFP lock implementation. The code is specific

3074 ** to MacOSX and does not work on other unix platforms. No alternative

3075 ** is available. If you don't compile for a mac, then the "unix-afp"

3076 ** VFS is not available.

3077 **

3078 ******************* End of the AFP lock implementation ********************

3079 ******************************************************************************/

3080

3081 /******************************************************************************

3082 ************************* Begin NFS Locking ******************************/

3083

3084 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

3085 /*

3086 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

3087 ** must be either NO_LOCK or SHARED_LOCK.

3088 **

3089 ** If the locking level of the file descriptor is already at or below

3090 ** the requested locking level, this routine is a no-op.

3091 */

3092 static int nfsUnlock(sqlite3_file *id, int eFileLock){

3093 return posixUnlock(id, eFileLock, 1);

3094 }

3095

3096 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */

3097 /*

3098 ** The code above is the NFS lock implementation. The code is specific

3099 ** to MacOSX and does not work on other unix platforms. No alternative

3100 ** is available.

3101 **

3102 ******************* End of the NFS lock implementation ********************

3103 ******************************************************************************/

3104

3105 /******************************************************************************

3106 ************** Non-locking sqlite3_file methods ***************************

3107 **

3108 ** The next division contains implementations for all methods of the

3109 ** sqlite3_file object other than the locking methods. The locking

3110 ** methods were defined in divisions above (one locking method per

3111 ** division). Those methods that are common to all locking modes

3112 ** are gather together into this division.

3113 */

3114

3115 /*

3116 ** Seek to the offset passed as the second argument, then read cnt

3117 ** bytes into pBuf. Return the number of bytes actually read.

3118 **

3119 ** NB: If you define USE_PREAD or USE_PREAD64, then it might also

3120 ** be necessary to define _XOPEN_SOURCE to be 500. This varies from

3121 ** one system to another. Since SQLite does not define USE_PREAD

3122 ** in any form by default, we will not attempt to define _XOPEN_SOURCE.

3123 ** See tickets #2741 and #2681.

3124 **

3125 ** To avoid stomping the errno value on a failed read the lastErrno value

3126 ** is set before returning.

3127 */

3128 static int seekAndRead(unixFile id, sqlite3_int64 offset, void pBuf, int cnt){

3129 int got;

3130 int prior = 0;

3131 #if (!defined(USE_PREAD) && !defined(USE_PREAD64))

3132 i64 newOffset;

3133 #endif

3134 TIMER_START;

3135 assert( cnt==(cnt&0x1ffff) );

3136 assert( id->h>2 );

3137 cnt &= 0x1ffff;

3138 do{

3139 #if defined(USE_PREAD)

3140 got = osPread(id->h, pBuf, cnt, offset);

3141 SimulateIOError( got = -1 );

3142 #elif defined(USE_PREAD64)

3143 got = osPread64(id->h, pBuf, cnt, offset);

3144 SimulateIOError( got = -1 );

3145 #else

3146 newOffset = lseek(id->h, offset, SEEK_SET);

3147 SimulateIOError( newOffset-- );

3148 if( newOffset!=offset ){

3149 if( newOffset == -1 ){

3150 ((unixFile*)id)->lastErrno = errno;

3151 }else{

3152 ((unixFile*)id)->lastErrno = 0;

3153 }

3154 return -1;

3155 }

3156 got = osRead(id->h, pBuf, cnt);

3157 #endif

3158 if( got==cnt ) break;

3159 if( got<0 ){

3160 if( errno==EINTR ){ got = 1; continue; }

3161 prior = 0;

3162 ((unixFile*)id)->lastErrno = errno;

3163 break;

3164 }else if( got>0 ){

3165 cnt -= got;

3166 offset += got;

3167 prior += got;

3168 pBuf = (void)(got + (char)pBuf);

3169 }

3170 }while( got>0 );

3171 TIMER_END;

3172 OSTRACE(("READ %-3d %5d %7lld %llu\n",

3173 id->h, got+prior, offset-prior, TIMER_ELAPSED));

3174 return got+prior;

3175 }

3176

3177 /*

3178 ** Read data from a file into a buffer. Return SQLITE_OK if all

3179 ** bytes were read successfully and SQLITE_IOERR if anything goes

3180 ** wrong.

3181 */

3182 static int unixRead(

3183 sqlite3_file *id,

3184 void *pBuf,

3185 int amt,

3186 sqlite3_int64 offset

3187 ){

3188 unixFile pFile = (unixFile )id;

3189 int got;

3190 assert( id );

3191 assert( offset>=0 );

3192 assert( amt>0 );

3193

3194 /* If this is a database file (not a journal, master-journal or temp

3195 ** file), the bytes in the locking range should never be read or written. */

3196 #if 0

3197 assert( pFile->pUnused==0

3198 \|\| offset>=PENDING_BYTE+512

3199 \|\| offset+amt<=PENDING_BYTE

3200 );

3201 #endif

3202

3203 #if SQLITE_MAX_MMAP_SIZE>0

3204 /* Deal with as much of this read request as possible by transfering

3205 ** data from the memory mapping using memcpy(). */

3206 if( offset<pFile->mmapSize ){

3207 if( offset+amt <= pFile->mmapSize ){

3208 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);

3209 return SQLITE_OK;

3210 }else{

3211 int nCopy = pFile->mmapSize - offset;

3212 memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);

3213 pBuf = &((u8 *)pBuf)[nCopy];

3214 amt -= nCopy;

3215 offset += nCopy;

3216 }

3217 }

3218 #endif

3219

3220 got = seekAndRead(pFile, offset, pBuf, amt);

3221 if( got==amt ){

3222 return SQLITE_OK;

3223 }else if( got<0 ){

3224 /* lastErrno set by seekAndRead */

3225 return SQLITE_IOERR_READ;

3226 }else{

3227 pFile->lastErrno = 0; /* not a system error */

3228 /* Unread parts of the buffer must be zero-filled */

3229 memset(&((char*)pBuf)[got], 0, amt-got);

3230 return SQLITE_IOERR_SHORT_READ;

3231 }

3232 }

3233

3234 /*

3235 ** Attempt to seek the file-descriptor passed as the first argument to

3236 ** absolute offset iOff, then attempt to write nBuf bytes of data from

3237 ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise,

3238 ** return the actual number of bytes written (which may be less than

3239 ** nBuf).

3240 */

3241 static int seekAndWriteFd(

3242 int fd, /* File descriptor to write to */

3243 i64 iOff, /* File offset to begin writing at */

3244 const void pBuf, / Copy data from this buffer to the file */

3245 int nBuf, /* Size of buffer pBuf in bytes */

3246 int piErrno / OUT: Error number if error occurs */

3247 ){

3248 int rc = 0; /* Value returned by system call */

3249

3250 assert( nBuf==(nBuf&0x1ffff) );

3251 assert( fd>2 );

3252 nBuf &= 0x1ffff;

3253 TIMER_START;

3254

3255 #if defined(USE_PREAD)

3256 do{ rc = osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );

3257 #elif defined(USE_PREAD64)

3258 do{ rc = osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);

3259 #else

3260 do{

3261 i64 iSeek = lseek(fd, iOff, SEEK_SET);

3262 SimulateIOError( iSeek-- );

3263

3264 if( iSeek!=iOff ){

3265 if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0);

3266 return -1;

3267 }

3268 rc = osWrite(fd, pBuf, nBuf);

3269 }while( rc<0 && errno==EINTR );

3270 #endif

3271

3272 TIMER_END;

3273 OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));

3274

3275 if( rc<0 && piErrno ) *piErrno = errno;

3276 return rc;

3277 }

3278

3279

3280 /*

3281 ** Seek to the offset in id->offset then read cnt bytes into pBuf.

3282 ** Return the number of bytes actually read. Update the offset.

3283 **

3284 ** To avoid stomping the errno value on a failed write the lastErrno value

3285 ** is set before returning.

3286 */

3287 static int seekAndWrite(unixFile id, i64 offset, const void pBuf, int cnt){

3288 return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);

3289 }

3290

3291

3292 /*

3293 ** Write data from a buffer into a file. Return SQLITE_OK on success

3294 ** or some other error code on failure.

3295 */

3296 static int unixWrite(

3297 sqlite3_file *id,

3298 const void *pBuf,

3299 int amt,

3300 sqlite3_int64 offset

3301 ){

3302 unixFile pFile = (unixFile)id;

3303 int wrote = 0;

3304 assert( id );

3305 assert( amt>0 );

3306

3307 /* If this is a database file (not a journal, master-journal or temp

3308 ** file), the bytes in the locking range should never be read or written. */

3309 #if 0

3310 assert( pFile->pUnused==0

3311 \|\| offset>=PENDING_BYTE+512

3312 \|\| offset+amt<=PENDING_BYTE

3313 );

3314 #endif

3315

3316 #ifdef SQLITE_DEBUG

3317 /* If we are doing a normal write to a database file (as opposed to

3318 ** doing a hot-journal rollback or a write to some file other than a

3319 ** normal database file) then record the fact that the database

3320 ** has changed. If the transaction counter is modified, record that

3321 ** fact too.

3322 */

3323 if( pFile->inNormalWrite ){

3324 pFile->dbUpdate = 1; /* The database has been modified */

3325 if( offset<=24 && offset+amt>=27 ){

3326 int rc;

3327 char oldCntr[4];

3328 SimulateIOErrorBenign(1);

3329 rc = seekAndRead(pFile, 24, oldCntr, 4);

3330 SimulateIOErrorBenign(0);

3331 if( rc!=4 \|\| memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){

3332 pFile->transCntrChng = 1; /* The transaction counter has changed */

3333 }

3334 }

3335 }

3336 #endif

3337

3338 #if SQLITE_MAX_MMAP_SIZE>0

3339 /* Deal with as much of this write request as possible by transfering

3340 ** data from the memory mapping using memcpy(). */

3341 if( offset<pFile->mmapSize ){

3342 if( offset+amt <= pFile->mmapSize ){

3343 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);

3344 return SQLITE_OK;

3345 }else{

3346 int nCopy = pFile->mmapSize - offset;

3347 memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);

3348 pBuf = &((u8 *)pBuf)[nCopy];

3349 amt -= nCopy;

3350 offset += nCopy;

3351 }

3352 }

3353 #endif

3354

3355 while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){

3356 amt -= wrote;

3357 offset += wrote;

3358 pBuf = &((char*)pBuf)[wrote];

3359 }

3360 SimulateIOError(( wrote=(-1), amt=1 ));

3361 SimulateDiskfullError(( wrote=0, amt=1 ));

3362

3363 if( amt>0 ){

3364 if( wrote<0 && pFile->lastErrno!=ENOSPC ){

3365 /* lastErrno set by seekAndWrite */

3366 return SQLITE_IOERR_WRITE;

3367 }else{

3368 pFile->lastErrno = 0; /* not a system error */

3369 return SQLITE_FULL;

3370 }

3371 }

3372

3373 return SQLITE_OK;

3374 }

3375

3376 #ifdef SQLITE_TEST

3377 /*

3378 ** Count the number of fullsyncs and normal syncs. This is used to test

3379 ** that syncs and fullsyncs are occurring at the right times.

3380 */

3381 int sqlite3_sync_count = 0;

3382 int sqlite3_fullsync_count = 0;

3383 #endif

3384

3385 /*

3386 ** We do not trust systems to provide a working fdatasync(). Some do.

3387 ** Others do no. To be safe, we will stick with the (slightly slower)

3388 ** fsync(). If you know that your system does support fdatasync() correctly,

3389 ** then simply compile with -Dfdatasync=fdatasync

3390 */

3391 #if !defined(fdatasync)

3392 # define fdatasync fsync

3393 #endif

3394

3395 /*

3396 ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not

3397 ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently

3398 ** only available on Mac OS X. But that could change.

3399 */

3400 #ifdef F_FULLFSYNC

3401 # define HAVE_FULLFSYNC 1

3402 #else

3403 # define HAVE_FULLFSYNC 0

3404 #endif

3405

3406

3407 /*

3408 ** The fsync() system call does not work as advertised on many

3409 ** unix systems. The following procedure is an attempt to make

3410 ** it work better.

3411 **

3412 ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful

3413 ** for testing when we want to run through the test suite quickly.

3414 ** You are strongly advised not to deploy with SQLITE_NO_SYNC

3415 ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash

3416 ** or power failure will likely corrupt the database file.

3417 **

3418 ** SQLite sets the dataOnly flag if the size of the file is unchanged.

3419 ** The idea behind dataOnly is that it should only write the file content

3420 ** to disk, not the inode. We only set dataOnly if the file size is

3421 ** unchanged since the file size is part of the inode. However,

3422 ** Ted Ts'o tells us that fdatasync() will also write the inode if the

3423 ** file size has changed. The only real difference between fdatasync()

3424 ** and fsync(), Ted tells us, is that fdatasync() will not flush the

3425 ** inode if the mtime or owner or other inode attributes have changed.

3426 ** We only care about the file size, not the other file attributes, so

3427 ** as far as SQLite is concerned, an fdatasync() is always adequate.

3428 ** So, we always use fdatasync() if it is available, regardless of

3429 ** the value of the dataOnly flag.

3430 */

3431 static int full_fsync(int fd, int fullSync, int dataOnly){

3432 int rc;

3433

3434 /* The following "ifdef/elif/else/" block has the same structure as

3435 ** the one below. It is replicated here solely to avoid cluttering

3436 ** up the real code with the UNUSED_PARAMETER() macros.

3437 */

3438 #ifdef SQLITE_NO_SYNC

3439 UNUSED_PARAMETER(fd);

3440 UNUSED_PARAMETER(fullSync);

3441 UNUSED_PARAMETER(dataOnly);

3442 #elif HAVE_FULLFSYNC

3443 UNUSED_PARAMETER(dataOnly);

3444 #else

3445 UNUSED_PARAMETER(fullSync);

3446 UNUSED_PARAMETER(dataOnly);

3447 #endif

3448

3449 /* Record the number of times that we do a normal fsync() and

3450 ** FULLSYNC. This is used during testing to verify that this procedure

3451 ** gets called with the correct arguments.

3452 */

3453 #ifdef SQLITE_TEST

3454 if( fullSync ) sqlite3_fullsync_count++;

3455 sqlite3_sync_count++;

3456 #endif

3457

3458 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a

3459 ** no-op

3460 */

3461 #ifdef SQLITE_NO_SYNC

3462 rc = SQLITE_OK;

3463 #elif HAVE_FULLFSYNC

3464 if( fullSync ){

3465 rc = osFcntl(fd, F_FULLFSYNC, 0);

3466 }else{

3467 rc = 1;

3468 }

3469 /* If the FULLFSYNC failed, fall back to attempting an fsync().

3470 ** It shouldn't be possible for fullfsync to fail on the local

3471 ** file system (on OSX), so failure indicates that FULLFSYNC

3472 ** isn't supported for this file system. So, attempt an fsync

3473 ** and (for now) ignore the overhead of a superfluous fcntl call.

3474 ** It'd be better to detect fullfsync support once and avoid

3475 ** the fcntl call every time sync is called.

3476 */

3477 if( rc ) rc = fsync(fd);

3478

3479 #elif defined(__APPLE__)

3480 /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly

3481 ** so currently we default to the macro that redefines fdatasync to fsync

3482 */

3483 rc = fsync(fd);

3484 #else

3485 rc = fdatasync(fd);

3486 #if OS_VXWORKS

3487 if( rc==-1 && errno==ENOTSUP ){

3488 rc = fsync(fd);

3489 }

3490 #endif /* OS_VXWORKS */

3491 #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */

3492

3493 if( OS_VXWORKS && rc!= -1 ){

3494 rc = 0;

3495 }

3496 return rc;

3497 }

3498

3499 /*

3500 ** Open a file descriptor to the directory containing file zFilename.

3501 ** If successful, *pFd is set to the opened file descriptor and

3502 ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM

3503 ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined

3504 ** value.

3505 **

3506 ** The directory file descriptor is used for only one thing - to

3507 ** fsync() a directory to make sure file creation and deletion events

3508 ** are flushed to disk. Such fsyncs are not needed on newer

3509 ** journaling filesystems, but are required on older filesystems.

3510 **

3511 ** This routine can be overridden using the xSetSysCall interface.

3512 ** The ability to override this routine was added in support of the

3513 ** chromium sandbox. Opening a directory is a security risk (we are

3514 ** told) so making it overrideable allows the chromium sandbox to

3515 ** replace this routine with a harmless no-op. To make this routine

3516 ** a no-op, replace it with a stub that returns SQLITE_OK but leaves

3517 ** *pFd set to a negative number.

3518 **

3519 ** If SQLITE_OK is returned, the caller is responsible for closing

3520 ** the file descriptor *pFd using close().

3521 */

3522 static int openDirectory(const char zFilename, int pFd){

3523 int ii;

3524 int fd = -1;

3525 char zDirname[MAX_PATHNAME+1];

3526

3527 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);

3528 for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);

3529 if( ii>0 ){

3530 zDirname[ii] = '\0';

3531 fd = robust_open(zDirname, O_RDONLY\|O_BINARY, 0);

3532 if( fd>=0 ){

3533 OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));

3534 }

3535 }

3536 *pFd = fd;

3537 return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));

3538 }

3539

3540 /*

3541 ** Make sure all writes to a particular file are committed to disk.

3542 **

3543 ** If dataOnly==0 then both the file itself and its metadata (file

3544 ** size, access time, etc) are synced. If dataOnly!=0 then only the

3545 ** file data is synced.

3546 **

3547 ** Under Unix, also make sure that the directory entry for the file

3548 ** has been created by fsync-ing the directory that contains the file.

3549 ** If we do not do this and we encounter a power failure, the directory

3550 ** entry for the journal might not exist after we reboot. The next

3551 ** SQLite to access the file will not know that the journal exists (because

3552 ** the directory entry for the journal was never created) and the transaction

3553 ** will not roll back - possibly leading to database corruption.

3554 */

3555 static int unixSync(sqlite3_file *id, int flags){

3556 int rc;

3557 unixFile pFile = (unixFile)id;

3558

3559 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);

3560 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;

3561

3562 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */

3563 assert((flags&0x0F)==SQLITE_SYNC_NORMAL

3564 \|\| (flags&0x0F)==SQLITE_SYNC_FULL

3565 );

3566

3567 /* Unix cannot, but some systems may return SQLITE_FULL from here. This

3568 ** line is to test that doing so does not cause any problems.

3569 */

3570 SimulateDiskfullError( return SQLITE_FULL );

3571

3572 assert( pFile );

3573 OSTRACE(("SYNC %-3d\n", pFile->h));

3574 rc = full_fsync(pFile->h, isFullsync, isDataOnly);

3575 SimulateIOError( rc=1 );

3576 if( rc ){

3577 pFile->lastErrno = errno;

3578 return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);

3579 }

3580

3581 /* Also fsync the directory containing the file if the DIRSYNC flag

3582 ** is set. This is a one-time occurrence. Many systems (examples: AIX)

3583 ** are unable to fsync a directory, so ignore errors on the fsync.

3584 */

3585 if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){

3586 int dirfd;

3587 OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,

3588 HAVE_FULLFSYNC, isFullsync));

3589 rc = osOpenDirectory(pFile->zPath, &dirfd);

3590 if( rc==SQLITE_OK && dirfd>=0 ){

3591 full_fsync(dirfd, 0, 0);

3592 robust_close(pFile, dirfd, __LINE__);

3593 }else if( rc==SQLITE_CANTOPEN ){

3594 rc = SQLITE_OK;

3595 }

3596 pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;

3597 }

3598 return rc;

3599 }

3600

3601 /*

3602 ** Truncate an open file to a specified size

3603 */

3604 static int unixTruncate(sqlite3_file *id, i64 nByte){

3605 unixFile pFile = (unixFile )id;

3606 int rc;

3607 assert( pFile );

3608 SimulateIOError( return SQLITE_IOERR_TRUNCATE );

3609

3610 /* If the user has configured a chunk-size for this file, truncate the

3611 ** file so that it consists of an integer number of chunks (i.e. the

3612 ** actual file size after the operation may be larger than the requested

3613 ** size).

3614 */

3615 if( pFile->szChunk>0 ){

3616 nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;

3617 }

3618

3619 rc = robust_ftruncate(pFile->h, nByte);

3620 if( rc ){

3621 pFile->lastErrno = errno;

3622 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);

3623 }else{

3624 #ifdef SQLITE_DEBUG

3625 /* If we are doing a normal write to a database file (as opposed to

3626 ** doing a hot-journal rollback or a write to some file other than a

3627 ** normal database file) and we truncate the file to zero length,

3628 ** that effectively updates the change counter. This might happen

3629 ** when restoring a database using the backup API from a zero-length

3630 ** source.

3631 */

3632 if( pFile->inNormalWrite && nByte==0 ){

3633 pFile->transCntrChng = 1;

3634 }

3635 #endif

3636

3637 #if SQLITE_MAX_MMAP_SIZE>0

3638 /* If the file was just truncated to a size smaller than the currently

3639 ** mapped region, reduce the effective mapping size as well. SQLite will

3640 ** use read() and write() to access data beyond this point from now on.

3641 */

3642 if( nByte<pFile->mmapSize ){

3643 pFile->mmapSize = nByte;

3644 }

3645 #endif

3646

3647 return SQLITE_OK;

3648 }

3649 }

3650

3651 /*

3652 ** Determine the current size of a file in bytes

3653 */

3654 static int unixFileSize(sqlite3_file id, i64 pSize){

3655 int rc;

3656 struct stat buf;

3657 assert( id );

3658 rc = osFstat(((unixFile*)id)->h, &buf);

3659 SimulateIOError( rc=1 );

3660 if( rc!=0 ){

3661 ((unixFile*)id)->lastErrno = errno;

3662 return SQLITE_IOERR_FSTAT;

3663 }

3664 *pSize = buf.st_size;

3665

3666 /* When opening a zero-size database, the findInodeInfo() procedure

3667 ** writes a single byte into that file in order to work around a bug

3668 ** in the OS-X msdos filesystem. In order to avoid problems with upper

3669 ** layers, we need to report this file size as zero even though it is

3670 ** really 1. Ticket #3260.

3671 */

3672 if( pSize==1 ) pSize = 0;

3673

3674

3675 return SQLITE_OK;

3676 }

3677

3678 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)

3679 /*

3680 ** Handler for proxy-locking file-control verbs. Defined below in the

3681 ** proxying locking division.

3682 */

3683 static int proxyFileControl(sqlite3_file,int,void);

3684 #endif

3685

3686 /*

3687 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT

3688 ** file-control operation. Enlarge the database to nBytes in size

3689 ** (rounded up to the next chunk-size). If the database is already

3690 ** nBytes or larger, this routine is a no-op.

3691 */

3692 static int fcntlSizeHint(unixFile *pFile, i64 nByte){

3693 if( pFile->szChunk>0 ){

3694 i64 nSize; /* Required file size */

3695 struct stat buf; /* Used to hold return values of fstat() */

3696

3697 if( osFstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT;

3698

3699 nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;

3700 if( nSize>(i64)buf.st_size ){

3701

3702 #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE

3703 /* The code below is handling the return value of osFallocate()

3704 ** correctly. posix_fallocate() is defined to "returns zero on success,

3705 ** or an error number on failure". See the manpage for details. */

3706 int err;

3707 do{

3708 err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size);

3709 }while( err==EINTR );

3710 if( err ) return SQLITE_IOERR_WRITE;

3711 #else

3712 /* If the OS does not have posix_fallocate(), fake it. First use

3713 ** ftruncate() to set the file size, then write a single byte to

3714 ** the last byte in each block within the extended region. This

3715 ** is the same technique used by glibc to implement posix_fallocate()

3716 ** on systems that do not have a real fallocate() system call.

3717 */

3718 int nBlk = buf.st_blksize; /* File-system block size */

3719 i64 iWrite; /* Next offset to write to */

3720

3721 if( robust_ftruncate(pFile->h, nSize) ){

3722 pFile->lastErrno = errno;

3723 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);

3724 }

3725 iWrite = ((buf.st_size + 2nBlk - 1)/nBlk)nBlk-1;

3726 while( iWrite<nSize ){

3727 int nWrite = seekAndWrite(pFile, iWrite, "", 1);

3728 if( nWrite!=1 ) return SQLITE_IOERR_WRITE;

3729 iWrite += nBlk;

3730 }

3731 #endif

3732 }

3733 }

3734

3735 #if SQLITE_MAX_MMAP_SIZE>0

3736 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){

3737 int rc;

3738 if( pFile->szChunk<=0 ){

3739 if( robust_ftruncate(pFile->h, nByte) ){

3740 pFile->lastErrno = errno;

3741 return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);

3742 }

3743 }

3744

3745 rc = unixMapfile(pFile, nByte);

3746 return rc;

3747 }

3748 #endif

3749

3750 return SQLITE_OK;

3751 }

3752

3753 /*

3754 ** If pArg is initially negative then this is a query. Set pArg to

3755 ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.

3756 **

3757 ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.

3758 */

3759 static void unixModeBit(unixFile pFile, unsigned char mask, int pArg){

3760 if( *pArg<0 ){

3761 *pArg = (pFile->ctrlFlags & mask)!=0;

3762 }else if( (*pArg)==0 ){

3763 pFile->ctrlFlags &= ~mask;

3764 }else{

3765 pFile->ctrlFlags \|= mask;

3766 }

3767 }

3768

3769 /* Forward declaration */

3770 static int unixGetTempname(int nBuf, char *zBuf);

3771

3772 /*

3773 ** Information and control of an open file handle.

3774 */

3775 static int unixFileControl(sqlite3_file id, int op, void pArg){

3776 unixFile pFile = (unixFile)id;

3777 switch( op ){

3778 case SQLITE_FCNTL_LOCKSTATE: {

3779 (int)pArg = pFile->eFileLock;

3780 return SQLITE_OK;

3781 }

3782 case SQLITE_LAST_ERRNO: {

3783 (int)pArg = pFile->lastErrno;

3784 return SQLITE_OK;

3785 }

3786 case SQLITE_FCNTL_CHUNK_SIZE: {

3787 pFile->szChunk = (int )pArg;

3788 return SQLITE_OK;

3789 }

3790 case SQLITE_FCNTL_SIZE_HINT: {

3791 int rc;

3792 SimulateIOErrorBenign(1);

3793 rc = fcntlSizeHint(pFile, (i64 )pArg);

3794 SimulateIOErrorBenign(0);

3795 return rc;

3796 }

3797 case SQLITE_FCNTL_PERSIST_WAL: {

3798 unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);

3799 return SQLITE_OK;

3800 }

3801 case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {

3802 unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);

3803 return SQLITE_OK;

3804 }

3805 case SQLITE_FCNTL_VFSNAME: {

3806 (char*)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);

3807 return SQLITE_OK;

3808 }

3809 case SQLITE_FCNTL_TEMPFILENAME: {

3810 char *zTFile = sqlite3_malloc( pFile->pVfs->mxPathname );

3811 if( zTFile ){

3812 unixGetTempname(pFile->pVfs->mxPathname, zTFile);

3813 (char*)pArg = zTFile;

3814 }

3815 return SQLITE_OK;

3816 }

3817 case SQLITE_FCNTL_HAS_MOVED: {

3818 (int)pArg = fileHasMoved(pFile);

3819 return SQLITE_OK;

3820 }

3821 #if SQLITE_MAX_MMAP_SIZE>0

3822 case SQLITE_FCNTL_MMAP_SIZE: {

3823 i64 newLimit = (i64)pArg;

3824 int rc = SQLITE_OK;

3825 if( newLimit>sqlite3GlobalConfig.mxMmap ){

3826 newLimit = sqlite3GlobalConfig.mxMmap;

3827 }

3828 (i64)pArg = pFile->mmapSizeMax;

3829 if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){

3830 pFile->mmapSizeMax = newLimit;

3831 if( pFile->mmapSize>0 ){

3832 unixUnmapfile(pFile);

3833 rc = unixMapfile(pFile, -1);

3834 }

3835 }

3836 return rc;

3837 }

3838 #endif

3839 #ifdef SQLITE_DEBUG

3840 /* The pager calls this method to signal that it has done

3841 ** a rollback and that the database is therefore unchanged and

3842 ** it hence it is OK for the transaction change counter to be

3843 ** unchanged.

3844 */

3845 case SQLITE_FCNTL_DB_UNCHANGED: {

3846 ((unixFile*)id)->dbUpdate = 0;

3847 return SQLITE_OK;

3848 }

3849 #endif

3850 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)

3851 case SQLITE_SET_LOCKPROXYFILE:

3852 case SQLITE_GET_LOCKPROXYFILE: {

3853 return proxyFileControl(id,op,pArg);

3854 }

3855 #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */

3856 }

3857 return SQLITE_NOTFOUND;

3858 }

3859

3860 /*

3861 ** Return the sector size in bytes of the underlying block device for

3862 ** the specified file. This is almost always 512 bytes, but may be

3863 ** larger for some devices.

3864 **

3865 ** SQLite code assumes this function cannot fail. It also assumes that

3866 ** if two files are created in the same file-system directory (i.e.

3867 ** a database and its journal file) that the sector size will be the

3868 ** same for both.

3869 */

3870 #ifndef __QNXNTO__

3871 static int unixSectorSize(sqlite3_file *NotUsed){

3872 UNUSED_PARAMETER(NotUsed);

3873 return SQLITE_DEFAULT_SECTOR_SIZE;

3874 }

3875 #endif

3876

3877 /*

3878 ** The following version of unixSectorSize() is optimized for QNX.

3879 */

3880 #ifdef __QNXNTO__

3881 #include <sys/dcmd_blk.h>

3882 #include <sys/statvfs.h>

3883 static int unixSectorSize(sqlite3_file *id){

3884 unixFile pFile = (unixFile)id;

3885 if( pFile->sectorSize == 0 ){

3886 struct statvfs fsInfo;

3887

3888 /* Set defaults for non-supported filesystems */

3889 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;

3890 pFile->deviceCharacteristics = 0;

3891 if( fstatvfs(pFile->h, &fsInfo) == -1 ) {

3892 return pFile->sectorSize;

3893 }

3894

3895 if( !strcmp(fsInfo.f_basetype, "tmp") ) {

3896 pFile->sectorSize = fsInfo.f_bsize;

3897 pFile->deviceCharacteristics =

3898 SQLITE_IOCAP_ATOMIC4K \| /* All ram filesystem writes are atomic */

3899 SQLITE_IOCAP_SAFE_APPEND \| /* growing the file does not occur until

3900 ** the write succeeds */

3901 SQLITE_IOCAP_SEQUENTIAL \| /* The ram filesystem has no write behind

3902 ** so it is ordered */

3903 0;

3904 }else if( strstr(fsInfo.f_basetype, "etfs") ){

3905 pFile->sectorSize = fsInfo.f_bsize;

3906 pFile->deviceCharacteristics =

3907 /* etfs cluster size writes are atomic */

3908 (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) \|

3909 SQLITE_IOCAP_SAFE_APPEND \| /* growing the file does not occur until

3910 ** the write succeeds */

3911 SQLITE_IOCAP_SEQUENTIAL \| /* The ram filesystem has no write behind

3912 ** so it is ordered */

3913 0;

3914 }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){

3915 pFile->sectorSize = fsInfo.f_bsize;

3916 pFile->deviceCharacteristics =

3917 SQLITE_IOCAP_ATOMIC \| /* All filesystem writes are atomic */

3918 SQLITE_IOCAP_SAFE_APPEND \| /* growing the file does not occur until

3919 ** the write succeeds */

3920 SQLITE_IOCAP_SEQUENTIAL \| /* The ram filesystem has no write behind

3921 ** so it is ordered */

3922 0;

3923 }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){

3924 pFile->sectorSize = fsInfo.f_bsize;

3925 pFile->deviceCharacteristics =

3926 /* full bitset of atomics from max sector size and smaller */

3927 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 \|

3928 SQLITE_IOCAP_SEQUENTIAL \| /* The ram filesystem has no write behind

3929 ** so it is ordered */

3930 0;

3931 }else if( strstr(fsInfo.f_basetype, "dos") ){

3932 pFile->sectorSize = fsInfo.f_bsize;

3933 pFile->deviceCharacteristics =

3934 /* full bitset of atomics from max sector size and smaller */

3935 ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 \|

3936 SQLITE_IOCAP_SEQUENTIAL \| /* The ram filesystem has no write behind

3937 ** so it is ordered */

3938 0;

3939 }else{

3940 pFile->deviceCharacteristics =

3941 SQLITE_IOCAP_ATOMIC512 \| /* blocks are atomic */

3942 SQLITE_IOCAP_SAFE_APPEND \| /* growing the file does not occur until

3943 ** the write succeeds */

3944 0;

3945 }

3946 }

3947 /* Last chance verification. If the sector size isn't a multiple of 512

3948 ** then it isn't valid.*/

3949 if( pFile->sectorSize % 512 != 0 ){

3950 pFile->deviceCharacteristics = 0;

3951 pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;

3952 }

3953 return pFile->sectorSize;

3954 }

3955 #endif /* __QNXNTO__ */

3956

3957 /*

3958 ** Return the device characteristics for the file.

3959 **

3960 ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.

3961 ** However, that choice is controversial since technically the underlying

3962 ** file system does not always provide powersafe overwrites. (In other

3963 ** words, after a power-loss event, parts of the file that were never

3964 ** written might end up being altered.) However, non-PSOW behavior is very,

3965 ** very rare. And asserting PSOW makes a large reduction in the amount

3966 ** of required I/O for journaling, since a lot of padding is eliminated.

3967 ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control

3968 ** available to turn it off and URI query parameter available to turn it off.

3969 */

3970 static int unixDeviceCharacteristics(sqlite3_file *id){

3971 unixFile p = (unixFile)id;

3972 int rc = 0;

3973 #ifdef __QNXNTO__

3974 if( p->sectorSize==0 ) unixSectorSize(id);

3975 rc = p->deviceCharacteristics;

3976 #endif

3977 if( p->ctrlFlags & UNIXFILE_PSOW ){

3978 rc \|= SQLITE_IOCAP_POWERSAFE_OVERWRITE;

3979 }

3980 return rc;

3981 }

3982

3983 #if !defined(SQLITE_OMIT_WAL) \|\| SQLITE_MAX_MMAP_SIZE>0

3984

3985 /*

3986 ** Return the system page size.

3987 **

3988 ** This function should not be called directly by other code in this file.

3989 ** Instead, it should be called via macro osGetpagesize().

3990 */

3991 static int unixGetpagesize(void){

3992 #if defined(_BSD_SOURCE)

3993 return getpagesize();

3994 #else

3995 return (int)sysconf(_SC_PAGESIZE);

3996 #endif

3997 }

3998

3999 #endif /* !defined(SQLITE_OMIT_WAL) \|\| SQLITE_MAX_MMAP_SIZE>0 */

4000

4001 #ifndef SQLITE_OMIT_WAL

4002

4003 /*

4004 ** Object used to represent an shared memory buffer.

4005 **

4006 ** When multiple threads all reference the same wal-index, each thread

4007 ** has its own unixShm object, but they all point to a single instance

4008 ** of this unixShmNode object. In other words, each wal-index is opened

4009 ** only once per process.

4010 **

4011 ** Each unixShmNode object is connected to a single unixInodeInfo object.

4012 ** We could coalesce this object into unixInodeInfo, but that would mean

4013 ** every open file that does not use shared memory (in other words, most

4014 ** open files) would have to carry around this extra information. So

4015 ** the unixInodeInfo object contains a pointer to this unixShmNode object

4016 ** and the unixShmNode object is created only when needed.

4017 **

4018 ** unixMutexHeld() must be true when creating or destroying

4019 ** this object or while reading or writing the following fields:

4020 **

4021 ** nRef

4022 **

4023 ** The following fields are read-only after the object is created:

4024 **

4025 ** fid

4026 ** zFilename

4027 **

4028 ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and

4029 ** unixMutexHeld() is true when reading or writing any other field

4030 ** in this structure.

4031 */

4032 struct unixShmNode {

4033 unixInodeInfo pInode; / unixInodeInfo that owns this SHM node */

4034 sqlite3_mutex mutex; / Mutex to access this object */

4035 char zFilename; / Name of the mmapped file */

4036 int h; /* Open file descriptor */

4037 int szRegion; /* Size of shared-memory regions */

4038 u16 nRegion; /* Size of array apRegion */

4039 u8 isReadonly; /* True if read-only */

4040 char *apRegion; / Array of mapped shared-memory regions */

4041 int nRef; /* Number of unixShm objects pointing to this */

4042 unixShm pFirst; / All unixShm objects pointing to this */

4043 #ifdef SQLITE_DEBUG

4044 u8 exclMask; /* Mask of exclusive locks held */

4045 u8 sharedMask; /* Mask of shared locks held */

4046 u8 nextShmId; /* Next available unixShm.id value */

4047 #endif

4048 };

4049

4050 /*

4051 ** Structure used internally by this VFS to record the state of an

4052 ** open shared memory connection.

4053 **

4054 ** The following fields are initialized when this object is created and

4055 ** are read-only thereafter:

4056 **

4057 ** unixShm.pFile

4058 ** unixShm.id

4059 **

4060 ** All other fields are read/write. The unixShm.pFile->mutex must be held

4061 ** while accessing any read/write fields.

4062 */

4063 struct unixShm {

4064 unixShmNode pShmNode; / The underlying unixShmNode object */

4065 unixShm pNext; / Next unixShm with the same unixShmNode */

4066 u8 hasMutex; /* True if holding the unixShmNode mutex */

4067 u8 id; /* Id of this connection within its unixShmNode */

4068 u16 sharedMask; /* Mask of shared locks held */

4069 u16 exclMask; /* Mask of exclusive locks held */

4070 };

4071

4072 /*

4073 ** Constants used for locking

4074 */

4075 #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)4) / first lock byte */

4076 #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */

4077

4078 /*

4079 ** Apply posix advisory locks for all bytes from ofst through ofst+n-1.

4080 **

4081 ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking

4082 ** otherwise.

4083 */

4084 static int unixShmSystemLock(

4085 unixShmNode pShmNode, / Apply locks to this open shared-memory segment */

4086 int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */

4087 int ofst, /* First byte of the locking range */

4088 int n /* Number of bytes to lock */

4089 ){

4090 struct flock f; /* The posix advisory locking structure */

4091 int rc = SQLITE_OK; /* Result code form fcntl() */

4092

4093 /* Access to the unixShmNode object is serialized by the caller */

4094 assert( sqlite3_mutex_held(pShmNode->mutex) \|\| pShmNode->nRef==0 );

4095

4096 /* Shared locks never span more than one byte */

4097 assert( n==1 \|\| lockType!=F_RDLCK );

4098

4099 /* Locks are within range */

4100 assert( n>=1 && n<SQLITE_SHM_NLOCK );

4101

4102 if( pShmNode->h>=0 ){

4103 /* Initialize the locking parameters */

4104 memset(&f, 0, sizeof(f));

4105 f.l_type = lockType;

4106 f.l_whence = SEEK_SET;

4107 f.l_start = ofst;

4108 f.l_len = n;

4109

4110 rc = osFcntl(pShmNode->h, F_SETLK, &f);

4111 rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;

4112 }

4113

4114 /* Update the global lock state and do debug tracing */

4115 #ifdef SQLITE_DEBUG

4116 { u16 mask;

4117 OSTRACE(("SHM-LOCK "));

4118 mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst);

4119 if( rc==SQLITE_OK ){

4120 if( lockType==F_UNLCK ){

4121 OSTRACE(("unlock %d ok", ofst));

4122 pShmNode->exclMask &= ~mask;

4123 pShmNode->sharedMask &= ~mask;

4124 }else if( lockType==F_RDLCK ){

4125 OSTRACE(("read-lock %d ok", ofst));

4126 pShmNode->exclMask &= ~mask;

4127 pShmNode->sharedMask \|= mask;

4128 }else{

4129 assert( lockType==F_WRLCK );

4130 OSTRACE(("write-lock %d ok", ofst));

4131 pShmNode->exclMask \|= mask;

4132 pShmNode->sharedMask &= ~mask;

4133 }

4134 }else{

4135 if( lockType==F_UNLCK ){

4136 OSTRACE(("unlock %d failed", ofst));

4137 }else if( lockType==F_RDLCK ){

4138 OSTRACE(("read-lock failed"));

4139 }else{

4140 assert( lockType==F_WRLCK );

4141 OSTRACE(("write-lock %d failed", ofst));

4142 }

4143 }

4144 OSTRACE((" - afterwards %03x,%03x\n",

4145 pShmNode->sharedMask, pShmNode->exclMask));

4146 }

4147 #endif

4148

4149 return rc;

4150 }

4151

4152 /*

4153 ** Return the minimum number of 32KB shm regions that should be mapped at

4154 ** a time, assuming that each mapping must be an integer multiple of the

4155 ** current system page-size.

4156 **

4157 ** Usually, this is 1. The exception seems to be systems that are configured

4158 ** to use 64KB pages - in this case each mapping must cover at least two

4159 ** shm regions.

4160 */

4161 static int unixShmRegionPerMap(void){

4162 int shmsz = 321024; / SHM region size */

4163 int pgsz = osGetpagesize(); /* System page size */

4164 assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */

4165 if( pgsz<shmsz ) return 1;

4166 return pgsz/shmsz;

4167 }

4168

4169 /*

4170 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.

4171 **

4172 ** This is not a VFS shared-memory method; it is a utility function called

4173 ** by VFS shared-memory methods.

4174 */

4175 static void unixShmPurge(unixFile *pFd){

4176 unixShmNode *p = pFd->pInode->pShmNode;

4177 assert( unixMutexHeld() );

4178 if( p && p->nRef==0 ){

4179 int nShmPerMap = unixShmRegionPerMap();

4180 int i;

4181 assert( p->pInode==pFd->pInode );

4182 sqlite3_mutex_free(p->mutex);

4183 for(i=0; i<p->nRegion; i+=nShmPerMap){

4184 if( p->h>=0 ){

4185 osMunmap(p->apRegion[i], p->szRegion);

4186 }else{

4187 sqlite3_free(p->apRegion[i]);

4188 }

4189 }

4190 sqlite3_free(p->apRegion);

4191 if( p->h>=0 ){

4192 robust_close(pFd, p->h, __LINE__);

4193 p->h = -1;

4194 }

4195 p->pInode->pShmNode = 0;

4196 sqlite3_free(p);

4197 }

4198 }

4199

4200 /*

4201 ** Open a shared-memory area associated with open database file pDbFd.

4202 ** This particular implementation uses mmapped files.

4203 **

4204 ** The file used to implement shared-memory is in the same directory

4205 ** as the open database file and has the same name as the open database

4206 ** file with the "-shm" suffix added. For example, if the database file

4207 ** is "/home/user1/config.db" then the file that is created and mmapped

4208 ** for shared memory will be called "/home/user1/config.db-shm".

4209 **

4210 ** Another approach to is to use files in /dev/shm or /dev/tmp or an

4211 ** some other tmpfs mount. But if a file in a different directory

4212 ** from the database file is used, then differing access permissions

4213 ** or a chroot() might cause two different processes on the same

4214 ** database to end up using different files for shared memory -

4215 ** meaning that their memory would not really be shared - resulting

4216 ** in database corruption. Nevertheless, this tmpfs file usage

4217 ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm"

4218 ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time

4219 ** option results in an incompatible build of SQLite; builds of SQLite

4220 ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the

4221 ** same database file at the same time, database corruption will likely

4222 ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered

4223 ** "unsupported" and may go away in a future SQLite release.

4224 **

4225 ** When opening a new shared-memory file, if no other instances of that

4226 ** file are currently open, in this process or in other processes, then

4227 ** the file must be truncated to zero length or have its header cleared.

4228 **

4229 ** If the original database file (pDbFd) is using the "unix-excl" VFS

4230 ** that means that an exclusive lock is held on the database file and

4231 ** that no other processes are able to read or write the database. In

4232 ** that case, we do not really need shared memory. No shared memory

4233 ** file is created. The shared memory will be simulated with heap memory.

4234 */

4235 static int unixOpenSharedMemory(unixFile *pDbFd){

4236 struct unixShm p = 0; / The connection to be opened */

4237 struct unixShmNode pShmNode; / The underlying mmapped file */

4238 int rc; /* Result code */

4239 unixInodeInfo pInode; / The inode of fd */

4240 char zShmFilename; / Name of the file used for SHM */

4241 int nShmFilename; /* Size of the SHM filename in bytes */

4242

4243 /* Allocate space for the new unixShm object. */

4244 p = sqlite3_malloc( sizeof(*p) );

4245 if( p==0 ) return SQLITE_NOMEM;

4246 memset(p, 0, sizeof(*p));

4247 assert( pDbFd->pShm==0 );

4248

4249 /* Check to see if a unixShmNode object already exists. Reuse an existing

4250 ** one if present. Create a new one if necessary.

4251 */

4252 unixEnterMutex();

4253 pInode = pDbFd->pInode;

4254 pShmNode = pInode->pShmNode;

4255 if( pShmNode==0 ){

4256 struct stat sStat; /* fstat() info for database file */

4257

4258 /* Call fstat() to figure out the permissions on the database file. If

4259 ** a new *-shm file is created, an attempt will be made to create it

4260 ** with the same permissions.

4261 */

4262 if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){

4263 rc = SQLITE_IOERR_FSTAT;

4264 goto shm_open_err;

4265 }

4266

4267 #ifdef SQLITE_SHM_DIRECTORY

4268 nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;

4269 #else

4270 nShmFilename = 6 + (int)strlen(pDbFd->zPath);

4271 #endif

4272 pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename );

4273 if( pShmNode==0 ){

4274 rc = SQLITE_NOMEM;

4275 goto shm_open_err;

4276 }

4277 memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);

4278 zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];

4279 #ifdef SQLITE_SHM_DIRECTORY

4280 sqlite3_snprintf(nShmFilename, zShmFilename,

4281 SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",

4282 (u32)sStat.st_ino, (u32)sStat.st_dev);

4283 #else

4284 sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", pDbFd->zPath);

4285 sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);

4286 #endif

4287 pShmNode->h = -1;

4288 pDbFd->pInode->pShmNode = pShmNode;

4289 pShmNode->pInode = pDbFd->pInode;

4290 pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);

4291 if( pShmNode->mutex==0 ){

4292 rc = SQLITE_NOMEM;

4293 goto shm_open_err;

4294 }

4295

4296 if( pInode->bProcessLock==0 ){

4297 int openFlags = O_RDWR \| O_CREAT;

4298 if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){

4299 openFlags = O_RDONLY;

4300 pShmNode->isReadonly = 1;

4301 }

4302 pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));

4303 if( pShmNode->h<0 ){

4304 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);

4305 goto shm_open_err;

4306 }

4307

4308 /* If this process is running as root, make sure that the SHM file

4309 ** is owned by the same user that owns the original database. Otherwise,

4310 ** the original owner will not be able to connect.

4311 */

4312 osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);

4313

4314 /* Check to see if another process is holding the dead-man switch.

4315 ** If not, truncate the file to zero length.

4316 */

4317 rc = SQLITE_OK;

4318 if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){

4319 if( robust_ftruncate(pShmNode->h, 0) ){

4320 rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);

4321 }

4322 }

4323 if( rc==SQLITE_OK ){

4324 rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);

4325 }

4326 if( rc ) goto shm_open_err;

4327 }

4328 }

4329

4330 /* Make the new connection a child of the unixShmNode */

4331 p->pShmNode = pShmNode;

4332 #ifdef SQLITE_DEBUG

4333 p->id = pShmNode->nextShmId++;

4334 #endif

4335 pShmNode->nRef++;

4336 pDbFd->pShm = p;

4337 unixLeaveMutex();

4338

4339 /* The reference count on pShmNode has already been incremented under

4340 ** the cover of the unixEnterMutex() mutex and the pointer from the

4341 ** new (struct unixShm) object to the pShmNode has been set. All that is

4342 ** left to do is to link the new object into the linked list starting

4343 ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex

4344 ** mutex.

4345 */

4346 sqlite3_mutex_enter(pShmNode->mutex);

4347 p->pNext = pShmNode->pFirst;

4348 pShmNode->pFirst = p;

4349 sqlite3_mutex_leave(pShmNode->mutex);

4350 return SQLITE_OK;

4351

4352 /* Jump here on any error */

4353 shm_open_err:

4354 unixShmPurge(pDbFd); /* This call frees pShmNode if required */

4355 sqlite3_free(p);

4356 unixLeaveMutex();

4357 return rc;

4358 }

4359

4360 /*

4361 ** This function is called to obtain a pointer to region iRegion of the

4362 ** shared-memory associated with the database file fd. Shared-memory regions

4363 ** are numbered starting from zero. Each shared-memory region is szRegion

4364 ** bytes in size.

4365 **

4366 ** If an error occurs, an error code is returned and *pp is set to NULL.

4367 **

4368 ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory

4369 ** region has not been allocated (by any client, including one running in a

4370 ** separate process), then *pp is set to NULL and SQLITE_OK returned. If

4371 ** bExtend is non-zero and the requested shared-memory region has not yet

4372 ** been allocated, it is allocated by this function.

4373 **

4374 ** If the shared-memory region has already been allocated or is allocated by

4375 ** this call as described above, then it is mapped into this processes

4376 ** address space (if it is not already), *pp is set to point to the mapped

4377 ** memory and SQLITE_OK returned.

4378 */

4379 static int unixShmMap(

4380 sqlite3_file fd, / Handle open on database file */

4381 int iRegion, /* Region to retrieve */

4382 int szRegion, /* Size of regions */

4383 int bExtend, /* True to extend file if necessary */

4384 void volatile *pp / OUT: Mapped memory */

4385 ){

4386 unixFile pDbFd = (unixFile)fd;

4387 unixShm *p;

4388 unixShmNode *pShmNode;

4389 int rc = SQLITE_OK;

4390 int nShmPerMap = unixShmRegionPerMap();

4391 int nReqRegion;

4392

4393 /* If the shared-memory file has not yet been opened, open it now. */

4394 if( pDbFd->pShm==0 ){

4395 rc = unixOpenSharedMemory(pDbFd);

4396 if( rc!=SQLITE_OK ) return rc;

4397 }

4398

4399 p = pDbFd->pShm;

4400 pShmNode = p->pShmNode;

4401 sqlite3_mutex_enter(pShmNode->mutex);

4402 assert( szRegion==pShmNode->szRegion \|\| pShmNode->nRegion==0 );

4403 assert( pShmNode->pInode==pDbFd->pInode );

4404 assert( pShmNode->h>=0 \|\| pDbFd->pInode->bProcessLock==1 );

4405 assert( pShmNode->h<0 \|\| pDbFd->pInode->bProcessLock==0 );

4406

4407 /* Minimum number of regions required to be mapped. */

4408 nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;

4409

4410 if( pShmNode->nRegion<nReqRegion ){

4411 char *apNew; / New apRegion[] array */

4412 int nByte = nReqRegionszRegion; / Minimum required file size */

4413 struct stat sStat; /* Used by fstat() */

4414

4415 pShmNode->szRegion = szRegion;

4416

4417 if( pShmNode->h>=0 ){

4418 /* The requested region is not mapped into this processes address space.

4419 ** Check to see if it has been allocated (i.e. if the wal-index file is

4420 ** large enough to contain the requested region).

4421 */

4422 if( osFstat(pShmNode->h, &sStat) ){

4423 rc = SQLITE_IOERR_SHMSIZE;

4424 goto shmpage_out;

4425 }

4426

4427 if( sStat.st_size<nByte ){

4428 /* The requested memory region does not exist. If bExtend is set to

4429 ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.

4430 */

4431 if( !bExtend ){

4432 goto shmpage_out;

4433 }

4434

4435 /* Alternatively, if bExtend is true, extend the file. Do this by

4436 ** writing a single byte to the end of each (OS) page being

4437 ** allocated or extended. Technically, we need only write to the

4438 ** last page in order to extend the file. But writing to all new

4439 ** pages forces the OS to allocate them immediately, which reduces

4440 ** the chances of SIGBUS while accessing the mapped region later on.

4441 */

4442 else{

4443 static const int pgsz = 4096;

4444 int iPg;

4445

4446 /* Write to the last byte of each newly allocated or extended page */

4447 assert( (nByte % pgsz)==0 );

4448 for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){

4449 if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){

4450 const char *zFile = pShmNode->zFilename;

4451 rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);

4452 goto shmpage_out;

4453 }

4454 }

4455 }

4456 }

4457 }

4458

4459 /* Map the requested memory region into this processes address space. */

4460 apNew = (char **)sqlite3_realloc(

4461 pShmNode->apRegion, nReqRegionsizeof(char )

4462 );

4463 if( !apNew ){

4464 rc = SQLITE_IOERR_NOMEM;

4465 goto shmpage_out;

4466 }

4467 pShmNode->apRegion = apNew;

4468 while( pShmNode->nRegion<nReqRegion ){

4469 int nMap = szRegion*nShmPerMap;

4470 int i;

4471 void *pMem;

4472 if( pShmNode->h>=0 ){

4473 pMem = osMmap(0, nMap,

4474 pShmNode->isReadonly ? PROT_READ : PROT_READ\|PROT_WRITE,

4475 MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion

4476 );

4477 if( pMem==MAP_FAILED ){

4478 rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);

4479 goto shmpage_out;

4480 }

4481 }else{

4482 pMem = sqlite3_malloc(szRegion);

4483 if( pMem==0 ){

4484 rc = SQLITE_NOMEM;

4485 goto shmpage_out;

4486 }

4487 memset(pMem, 0, szRegion);

4488 }

4489

4490 for(i=0; i<nShmPerMap; i++){

4491 pShmNode->apRegion[pShmNode->nRegion+i] = &((char)pMem)[szRegioni];

4492 }

4493 pShmNode->nRegion += nShmPerMap;

4494 }

4495 }

4496

4497 shmpage_out:

4498 if( pShmNode->nRegion>iRegion ){

4499 *pp = pShmNode->apRegion[iRegion];

4500 }else{

4501 *pp = 0;

4502 }

4503 if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;

4504 sqlite3_mutex_leave(pShmNode->mutex);

4505 return rc;

4506 }

4507

4508 /*

4509 ** Change the lock state for a shared-memory segment.

4510 **

4511 ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little

4512 ** different here than in posix. In xShmLock(), one can go from unlocked

4513 ** to shared and back or from unlocked to exclusive and back. But one may

4514 ** not go from shared to exclusive or from exclusive to shared.

4515 */

4516 static int unixShmLock(

4517 sqlite3_file fd, / Database file holding the shared memory */

4518 int ofst, /* First lock to acquire or release */

4519 int n, /* Number of locks to acquire or release */

4520 int flags /* What to do with the lock */

4521 ){

4522 unixFile pDbFd = (unixFile)fd; /* Connection holding shared memory */

4523 unixShm p = pDbFd->pShm; / The shared memory being locked */

4524 unixShm pX; / For looping over all siblings */

4525 unixShmNode pShmNode = p->pShmNode; / The underlying file iNode */

4526 int rc = SQLITE_OK; /* Result code */

4527 u16 mask; /* Mask of locks to take or release */

4528

4529 assert( pShmNode==pDbFd->pInode->pShmNode );

4530 assert( pShmNode->pInode==pDbFd->pInode );

4531 assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );

4532 assert( n>=1 );

4533 assert( flags==(SQLITE_SHM_LOCK \| SQLITE_SHM_SHARED)

4534 \|\| flags==(SQLITE_SHM_LOCK \| SQLITE_SHM_EXCLUSIVE)

4535 \|\| flags==(SQLITE_SHM_UNLOCK \| SQLITE_SHM_SHARED)

4536 \|\| flags==(SQLITE_SHM_UNLOCK \| SQLITE_SHM_EXCLUSIVE) );

4537 assert( n==1 \|\| (flags & SQLITE_SHM_EXCLUSIVE)!=0 );

4538 assert( pShmNode->h>=0 \|\| pDbFd->pInode->bProcessLock==1 );

4539 assert( pShmNode->h<0 \|\| pDbFd->pInode->bProcessLock==0 );

4540

4541 mask = (1<<(ofst+n)) - (1<<ofst);

4542 assert( n>1 \|\| mask==(1<<ofst) );

4543 sqlite3_mutex_enter(pShmNode->mutex);

4544 if( flags & SQLITE_SHM_UNLOCK ){

4545 u16 allMask = 0; /* Mask of locks held by siblings */

4546

4547 /* See if any siblings hold this same lock */

4548 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){

4549 if( pX==p ) continue;

4550 assert( (pX->exclMask & (p->exclMask\|p->sharedMask))==0 );

4551 allMask \|= pX->sharedMask;

4552 }

4553

4554 /* Unlock the system-level locks */

4555 if( (mask & allMask)==0 ){

4556 rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n);

4557 }else{

4558 rc = SQLITE_OK;

4559 }

4560

4561 /* Undo the local locks */

4562 if( rc==SQLITE_OK ){

4563 p->exclMask &= ~mask;

4564 p->sharedMask &= ~mask;

4565 }

4566 }else if( flags & SQLITE_SHM_SHARED ){

4567 u16 allShared = 0; /* Union of locks held by connections other than "p" */

4568

4569 /* Find out which shared locks are already held by sibling connections.

4570 ** If any sibling already holds an exclusive lock, go ahead and return

4571 ** SQLITE_BUSY.

4572 */

4573 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){

4574 if( (pX->exclMask & mask)!=0 ){

4575 rc = SQLITE_BUSY;

4576 break;

4577 }

4578 allShared \|= pX->sharedMask;

4579 }

4580

4581 /* Get shared locks at the system level, if necessary */

4582 if( rc==SQLITE_OK ){

4583 if( (allShared & mask)==0 ){

4584 rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n);

4585 }else{

4586 rc = SQLITE_OK;

4587 }

4588 }

4589

4590 /* Get the local shared locks */

4591 if( rc==SQLITE_OK ){

4592 p->sharedMask \|= mask;

4593 }

4594 }else{

4595 /* Make sure no sibling connections hold locks that will block this

4596 ** lock. If any do, return SQLITE_BUSY right away.

4597 */

4598 for(pX=pShmNode->pFirst; pX; pX=pX->pNext){

4599 if( (pX->exclMask & mask)!=0 \|\| (pX->sharedMask & mask)!=0 ){

4600 rc = SQLITE_BUSY;

4601 break;

4602 }

4603 }

4604

4605 /* Get the exclusive locks at the system level. Then if successful

4606 ** also mark the local connection as being locked.

4607 */

4608 if( rc==SQLITE_OK ){

4609 rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n);

4610 if( rc==SQLITE_OK ){

4611 assert( (p->sharedMask & mask)==0 );

4612 p->exclMask \|= mask;

4613 }

4614 }

4615 }

4616 sqlite3_mutex_leave(pShmNode->mutex);

4617 OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n",

4618 p->id, getpid(), p->sharedMask, p->exclMask));

4619 return rc;

4620 }

4621

4622 /*

4623 ** Implement a memory barrier or memory fence on shared memory.

4624 **

4625 ** All loads and stores begun before the barrier must complete before

4626 ** any load or store begun after the barrier.

4627 */

4628 static void unixShmBarrier(

4629 sqlite3_file fd / Database file holding the shared memory */

4630 ){

4631 UNUSED_PARAMETER(fd);

4632 unixEnterMutex();

4633 unixLeaveMutex();

4634 }

4635

4636 /*

4637 ** Close a connection to shared-memory. Delete the underlying

4638 ** storage if deleteFlag is true.

4639 **

4640 ** If there is no shared memory associated with the connection then this

4641 ** routine is a harmless no-op.

4642 */

4643 static int unixShmUnmap(

4644 sqlite3_file fd, / The underlying database file */

4645 int deleteFlag /* Delete shared-memory if true */

4646 ){

4647 unixShm p; / The connection to be closed */

4648 unixShmNode pShmNode; / The underlying shared-memory file */

4649 unixShm *pp; / For looping over sibling connections */

4650 unixFile pDbFd; / The underlying database file */

4651

4652 pDbFd = (unixFile*)fd;

4653 p = pDbFd->pShm;

4654 if( p==0 ) return SQLITE_OK;

4655 pShmNode = p->pShmNode;

4656

4657 assert( pShmNode==pDbFd->pInode->pShmNode );

4658 assert( pShmNode->pInode==pDbFd->pInode );

4659

4660 /* Remove connection p from the set of connections associated

4661 ** with pShmNode */

4662 sqlite3_mutex_enter(pShmNode->mutex);

4663 for(pp=&pShmNode->pFirst; (pp)!=p; pp = &(pp)->pNext){}

4664 *pp = p->pNext;

4665

4666 /* Free the connection p */

4667 sqlite3_free(p);

4668 pDbFd->pShm = 0;

4669 sqlite3_mutex_leave(pShmNode->mutex);

4670

4671 /* If pShmNode->nRef has reached 0, then close the underlying

4672 ** shared-memory file, too */

4673 unixEnterMutex();

4674 assert( pShmNode->nRef>0 );

4675 pShmNode->nRef--;

4676 if( pShmNode->nRef==0 ){

4677 if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename);

4678 unixShmPurge(pDbFd);

4679 }

4680 unixLeaveMutex();

4681

4682 return SQLITE_OK;

4683 }

4684

4685

4686 #else

4687 # define unixShmMap 0

4688 # define unixShmLock 0

4689 # define unixShmBarrier 0

4690 # define unixShmUnmap 0

4691 #endif /* #ifndef SQLITE_OMIT_WAL */

4692

4693 #if SQLITE_MAX_MMAP_SIZE>0

4694 /*

4695 ** If it is currently memory mapped, unmap file pFd.

4696 */

4697 static void unixUnmapfile(unixFile *pFd){

4698 assert( pFd->nFetchOut==0 );

4699 if( pFd->pMapRegion ){

4700 osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);

4701 pFd->pMapRegion = 0;

4702 pFd->mmapSize = 0;

4703 pFd->mmapSizeActual = 0;

4704 }

4705 }

4706

4707 /*

4708 ** Attempt to set the size of the memory mapping maintained by file

4709 ** descriptor pFd to nNew bytes. Any existing mapping is discarded.

4710 **

4711 ** If successful, this function sets the following variables:

4712 **

4713 ** unixFile.pMapRegion

4714 ** unixFile.mmapSize

4715 ** unixFile.mmapSizeActual

4716 **

4717 ** If unsuccessful, an error message is logged via sqlite3_log() and

4718 ** the three variables above are zeroed. In this case SQLite should

4719 ** continue accessing the database using the xRead() and xWrite()

4720 ** methods.

4721 */

4722 static void unixRemapfile(

4723 unixFile pFd, / File descriptor object */

4724 i64 nNew /* Required mapping size */

4725 ){

4726 const char *zErr = "mmap";

4727 int h = pFd->h; /* File descriptor open on db file */

4728 u8 pOrig = (u8 )pFd->pMapRegion; /* Pointer to current file mapping */

4729 i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */

4730 u8 pNew = 0; / Location of new mapping */

4731 int flags = PROT_READ; /* Flags to pass to mmap() */

4732

4733 assert( pFd->nFetchOut==0 );

4734 assert( nNew>pFd->mmapSize );

4735 assert( nNew<=pFd->mmapSizeMax );

4736 assert( nNew>0 );

4737 assert( pFd->mmapSizeActual>=pFd->mmapSize );

4738 assert( MAP_FAILED!=0 );

4739

4740 if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags \|= PROT_WRITE;

4741

4742 if( pOrig ){

4743 #if HAVE_MREMAP

4744 i64 nReuse = pFd->mmapSize;

4745 #else

4746 const int szSyspage = osGetpagesize();

4747 i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));

4748 #endif

4749 u8 *pReq = &pOrig[nReuse];

4750

4751 /* Unmap any pages of the existing mapping that cannot be reused. */

4752 if( nReuse!=nOrig ){

4753 osMunmap(pReq, nOrig-nReuse);

4754 }

4755

4756 #if HAVE_MREMAP

4757 pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);

4758 zErr = "mremap";

4759 #else

4760 pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);

4761 if( pNew!=MAP_FAILED ){

4762 if( pNew!=pReq ){

4763 osMunmap(pNew, nNew - nReuse);

4764 pNew = 0;

4765 }else{

4766 pNew = pOrig;

4767 }

4768 }

4769 #endif

4770

4771 /* The attempt to extend the existing mapping failed. Free it. */

4772 if( pNew==MAP_FAILED \|\| pNew==0 ){

4773 osMunmap(pOrig, nReuse);

4774 }

4775 }

4776

4777 /* If pNew is still NULL, try to create an entirely new mapping. */

4778 if( pNew==0 ){

4779 pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);

4780 }

4781

4782 if( pNew==MAP_FAILED ){

4783 pNew = 0;

4784 nNew = 0;

4785 unixLogError(SQLITE_OK, zErr, pFd->zPath);

4786

4787 /* If the mmap() above failed, assume that all subsequent mmap() calls

4788 ** will probably fail too. Fall back to using xRead/xWrite exclusively

4789 ** in this case. */

4790 pFd->mmapSizeMax = 0;

4791 }

4792 pFd->pMapRegion = (void *)pNew;

4793 pFd->mmapSize = pFd->mmapSizeActual = nNew;

4794 }

4795

4796 /*

4797 ** Memory map or remap the file opened by file-descriptor pFd (if the file

4798 ** is already mapped, the existing mapping is replaced by the new). Or, if

4799 ** there already exists a mapping for this file, and there are still

4800 ** outstanding xFetch() references to it, this function is a no-op.

4801 **

4802 ** If parameter nByte is non-negative, then it is the requested size of

4803 ** the mapping to create. Otherwise, if nByte is less than zero, then the

4804 ** requested size is the size of the file on disk. The actual size of the

4805 ** created mapping is either the requested size or the value configured

4806 ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.

4807 **

4808 ** SQLITE_OK is returned if no error occurs (even if the mapping is not

4809 ** recreated as a result of outstanding references) or an SQLite error

4810 ** code otherwise.

4811 */

4812 static int unixMapfile(unixFile *pFd, i64 nByte){

4813 i64 nMap = nByte;

4814 int rc;

4815

4816 assert( nMap>=0 \|\| pFd->nFetchOut==0 );

4817 if( pFd->nFetchOut>0 ) return SQLITE_OK;

4818

4819 if( nMap<0 ){

4820 struct stat statbuf; /* Low-level file information */

4821 rc = osFstat(pFd->h, &statbuf);

4822 if( rc!=SQLITE_OK ){

4823 return SQLITE_IOERR_FSTAT;

4824 }

4825 nMap = statbuf.st_size;

4826 }

4827 if( nMap>pFd->mmapSizeMax ){

4828 nMap = pFd->mmapSizeMax;

4829 }

4830

4831 if( nMap!=pFd->mmapSize ){

4832 if( nMap>0 ){

4833 unixRemapfile(pFd, nMap);

4834 }else{

4835 unixUnmapfile(pFd);

4836 }

4837 }

4838

4839 return SQLITE_OK;

4840 }

4841 #endif /* SQLITE_MAX_MMAP_SIZE>0 */

4842

4843 /*

4844 ** If possible, return a pointer to a mapping of file fd starting at offset

4845 ** iOff. The mapping must be valid for at least nAmt bytes.

4846 **

4847 ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.

4848 ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.

4849 ** Finally, if an error does occur, return an SQLite error code. The final

4850 ** value of *pp is undefined in this case.

4851 **

4852 ** If this function does return a pointer, the caller must eventually

4853 ** release the reference by calling unixUnfetch().

4854 */

4855 static int unixFetch(sqlite3_file fd, i64 iOff, int nAmt, void *pp){

4856 #if SQLITE_MAX_MMAP_SIZE>0

4857 unixFile pFd = (unixFile )fd; /* The underlying database file */

4858 #endif

4859 *pp = 0;

4860

4861 #if SQLITE_MAX_MMAP_SIZE>0

4862 if( pFd->mmapSizeMax>0 ){

4863 if( pFd->pMapRegion==0 ){

4864 int rc = unixMapfile(pFd, -1);

4865 if( rc!=SQLITE_OK ) return rc;

4866 }

4867 if( pFd->mmapSize >= iOff+nAmt ){

4868 pp = &((u8 )pFd->pMapRegion)[iOff];

4869 pFd->nFetchOut++;

4870 }

4871 }

4872 #endif

4873 return SQLITE_OK;

4874 }

4875

4876 /*

4877 ** If the third argument is non-NULL, then this function releases a

4878 ** reference obtained by an earlier call to unixFetch(). The second

4879 ** argument passed to this function must be the same as the corresponding

4880 ** argument that was passed to the unixFetch() invocation.

4881 **

4882 ** Or, if the third argument is NULL, then this function is being called

4883 ** to inform the VFS layer that, according to POSIX, any existing mapping

4884 ** may now be invalid and should be unmapped.

4885 */

4886 static int unixUnfetch(sqlite3_file fd, i64 iOff, void p){

4887 #if SQLITE_MAX_MMAP_SIZE>0

4888 unixFile pFd = (unixFile )fd; /* The underlying database file */

4889 UNUSED_PARAMETER(iOff);

4890

4891 /* If p==0 (unmap the entire file) then there must be no outstanding

4892 ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),

4893 ** then there must be at least one outstanding. */

4894 assert( (p==0)==(pFd->nFetchOut==0) );

4895

4896 /* If p!=0, it must match the iOff value. */

4897 assert( p==0 \|\| p==&((u8 *)pFd->pMapRegion)[iOff] );

4898

4899 if( p ){

4900 pFd->nFetchOut--;

4901 }else{

4902 unixUnmapfile(pFd);

4903 }

4904

4905 assert( pFd->nFetchOut>=0 );

4906 #else

4907 UNUSED_PARAMETER(fd);

4908 UNUSED_PARAMETER(p);

4909 UNUSED_PARAMETER(iOff);

4910 #endif

4911 return SQLITE_OK;

4912 }

4913

4914 /*

4915 ** Here ends the implementation of all sqlite3_file methods.

4916 **

4917 ******************** End sqlite3_file Methods *****************************

4918 ******************************************************************************/

4919

4920 /*

4921 ** This division contains definitions of sqlite3_io_methods objects that

4922 ** implement various file locking strategies. It also contains definitions

4923 ** of "finder" functions. A finder-function is used to locate the appropriate

4924 ** sqlite3_io_methods object for a particular database file. The pAppData

4925 ** field of the sqlite3_vfs VFS objects are initialized to be pointers to

4926 ** the correct finder-function for that VFS.

4927 **

4928 ** Most finder functions return a pointer to a fixed sqlite3_io_methods

4929 ** object. The only interesting finder-function is autolockIoFinder, which

4930 ** looks at the filesystem type and tries to guess the best locking

4931 ** strategy from that.

4932 **

4933 ** For finder-function F, two objects are created:

4934 **

4935 ** (1) The real finder-function named "FImpt()".

4936 **

4937 ** (2) A constant pointer to this function named just "F".

4938 **

4939 **

4940 ** A pointer to the F pointer is used as the pAppData value for VFS

4941 ** objects. We have to do this instead of letting pAppData point

4942 ** directly at the finder-function since C90 rules prevent a void*

4943 ** from be cast into a function pointer.

4944 **

4945 **

4946 ** Each instance of this macro generates two objects:

4947 **

4948 ** * A constant sqlite3_io_methods object call METHOD that has locking

4949 ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK.

4950 **

4951 ** * An I/O method finder function called FINDER that returns a pointer

4952 ** to the METHOD object in the previous bullet.

4953 */

4954 #define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK, SHMMAP) \

4955 static const sqlite3_io_methods METHOD = { \

4956 VERSION, /* iVersion */ \

4957 CLOSE, /* xClose */ \

4958 unixRead, /* xRead */ \

4959 unixWrite, /* xWrite */ \

4960 unixTruncate, /* xTruncate */ \

4961 unixSync, /* xSync */ \

4962 unixFileSize, /* xFileSize */ \

4963 LOCK, /* xLock */ \

4964 UNLOCK, /* xUnlock */ \

4965 CKLOCK, /* xCheckReservedLock */ \

4966 unixFileControl, /* xFileControl */ \

4967 unixSectorSize, /* xSectorSize */ \

4968 unixDeviceCharacteristics, /* xDeviceCapabilities */ \

4969 SHMMAP, /* xShmMap */ \

4970 unixShmLock, /* xShmLock */ \

4971 unixShmBarrier, /* xShmBarrier */ \

4972 unixShmUnmap, /* xShmUnmap */ \

4973 unixFetch, /* xFetch */ \

4974 unixUnfetch, /* xUnfetch */ \

4975 }; \

4976 static const sqlite3_io_methods FINDER##Impl(const char z, unixFile *p){ \

4977 UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \

4978 return &METHOD; \

4979 } \

4980 static const sqlite3_io_methods (const FINDER)(const char,unixFile p) \

4981 = FINDER##Impl;

4982

4983 /*

4984 ** Here are all of the sqlite3_io_methods objects for each of the

4985 ** locking strategies. Functions that return pointers to these methods

4986 ** are also created.

4987 */

4988 IOMETHODS(

4989 posixIoFinder, /* Finder function name */

4990 posixIoMethods, /* sqlite3_io_methods object name */

4991 3, /* shared memory and mmap are enabled */

4992 unixClose, /* xClose method */

4993 unixLock, /* xLock method */

4994 unixUnlock, /* xUnlock method */

4995 unixCheckReservedLock, /* xCheckReservedLock method */

4996 unixShmMap /* xShmMap method */

4997 )

4998 IOMETHODS(

4999 nolockIoFinder, /* Finder function name */

5000 nolockIoMethods, /* sqlite3_io_methods object name */

5001 3, /* shared memory is disabled */

5002 nolockClose, /* xClose method */

5003 nolockLock, /* xLock method */

5004 nolockUnlock, /* xUnlock method */

5005 nolockCheckReservedLock, /* xCheckReservedLock method */

5006 0 /* xShmMap method */

5007 )

5008 IOMETHODS(

5009 dotlockIoFinder, /* Finder function name */

5010 dotlockIoMethods, /* sqlite3_io_methods object name */

5011 1, /* shared memory is disabled */

5012 dotlockClose, /* xClose method */

5013 dotlockLock, /* xLock method */

5014 dotlockUnlock, /* xUnlock method */

5015 dotlockCheckReservedLock, /* xCheckReservedLock method */

5016 0 /* xShmMap method */

5017 )

5018

5019 #if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS

5020 IOMETHODS(

5021 flockIoFinder, /* Finder function name */

5022 flockIoMethods, /* sqlite3_io_methods object name */

5023 1, /* shared memory is disabled */

5024 flockClose, /* xClose method */

5025 flockLock, /* xLock method */

5026 flockUnlock, /* xUnlock method */

5027 flockCheckReservedLock, /* xCheckReservedLock method */

5028 0 /* xShmMap method */

5029 )

5030 #endif

5031

5032 #if OS_VXWORKS

5033 IOMETHODS(

5034 semIoFinder, /* Finder function name */

5035 semIoMethods, /* sqlite3_io_methods object name */

5036 1, /* shared memory is disabled */

5037 semClose, /* xClose method */

5038 semLock, /* xLock method */

5039 semUnlock, /* xUnlock method */

5040 semCheckReservedLock, /* xCheckReservedLock method */

5041 0 /* xShmMap method */

5042 )

5043 #endif

5044

5045 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

5046 IOMETHODS(

5047 afpIoFinder, /* Finder function name */

5048 afpIoMethods, /* sqlite3_io_methods object name */

5049 1, /* shared memory is disabled */

5050 afpClose, /* xClose method */

5051 afpLock, /* xLock method */

5052 afpUnlock, /* xUnlock method */

5053 afpCheckReservedLock, /* xCheckReservedLock method */

5054 0 /* xShmMap method */

5055 )

5056 #endif

5057

5058 /*

5059 ** The proxy locking method is a "super-method" in the sense that it

5060 ** opens secondary file descriptors for the conch and lock files and

5061 ** it uses proxy, dot-file, AFP, and flock() locking methods on those

5062 ** secondary files. For this reason, the division that implements

5063 ** proxy locking is located much further down in the file. But we need

5064 ** to go ahead and define the sqlite3_io_methods and finder function

5065 ** for proxy locking here. So we forward declare the I/O methods.

5066 */

5067 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

5068 static int proxyClose(sqlite3_file*);

5069 static int proxyLock(sqlite3_file*, int);

5070 static int proxyUnlock(sqlite3_file*, int);

5071 static int proxyCheckReservedLock(sqlite3_file, int);

5072 IOMETHODS(

5073 proxyIoFinder, /* Finder function name */

5074 proxyIoMethods, /* sqlite3_io_methods object name */

5075 1, /* shared memory is disabled */

5076 proxyClose, /* xClose method */

5077 proxyLock, /* xLock method */

5078 proxyUnlock, /* xUnlock method */

5079 proxyCheckReservedLock, /* xCheckReservedLock method */

5080 0 /* xShmMap method */

5081 )

5082 #endif

5083

5084 /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */

5085 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

5086 IOMETHODS(

5087 nfsIoFinder, /* Finder function name */

5088 nfsIoMethods, /* sqlite3_io_methods object name */

5089 1, /* shared memory is disabled */

5090 unixClose, /* xClose method */

5091 unixLock, /* xLock method */

5092 nfsUnlock, /* xUnlock method */

5093 unixCheckReservedLock, /* xCheckReservedLock method */

5094 0 /* xShmMap method */

5095 )

5096 #endif

5097

5098 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

5099 /*

5100 ** This "finder" function attempts to determine the best locking strategy

5101 ** for the database file "filePath". It then returns the sqlite3_io_methods

5102 ** object that implements that strategy.

5103 **

5104 ** This is for MacOSX only.

5105 */

5106 static const sqlite3_io_methods *autolockIoFinderImpl(

5107 const char filePath, / name of the database file */

5108 unixFile pNew / open file object for the database file */

5109 ){

5110 static const struct Mapping {

5111 const char zFilesystem; / Filesystem type name */

5112 const sqlite3_io_methods pMethods; / Appropriate locking method */

5113 } aMap[] = {

5114 { "hfs", &posixIoMethods },

5115 { "ufs", &posixIoMethods },

5116 { "afpfs", &afpIoMethods },

5117 { "smbfs", &afpIoMethods },

5118 { "webdav", &nolockIoMethods },

5119 { 0, 0 }

5120 };

5121 int i;

5122 struct statfs fsInfo;

5123 struct flock lockInfo;

5124

5125 if( !filePath ){

5126 /* If filePath==NULL that means we are dealing with a transient file

5127 ** that does not need to be locked. */

5128 return &nolockIoMethods;

5129 }

5130 if( statfs(filePath, &fsInfo) != -1 ){

5131 if( fsInfo.f_flags & MNT_RDONLY ){

5132 return &nolockIoMethods;

5133 }

5134 for(i=0; aMap[i].zFilesystem; i++){

5135 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){

5136 return aMap[i].pMethods;

5137 }

5138 }

5139 }

5140

5141 /* Default case. Handles, amongst others, "nfs".

5142 ** Test byte-range lock using fcntl(). If the call succeeds,

5143 ** assume that the file-system supports POSIX style locks.

5144 */

5145 lockInfo.l_len = 1;

5146 lockInfo.l_start = 0;

5147 lockInfo.l_whence = SEEK_SET;

5148 lockInfo.l_type = F_RDLCK;

5149 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {

5150 if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){

5151 return &nfsIoMethods;

5152 } else {

5153 return &posixIoMethods;

5154 }

5155 }else{

5156 return &dotlockIoMethods;

5157 }

5158 }

5159 static const sqlite3_io_methods

5160 (const autolockIoFinder)(const char,unixFile) = autolockIoFinderImpl;

5161

5162 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */

5163

5164 #if OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE

5165 /*

5166 ** This "finder" function attempts to determine the best locking strategy

5167 ** for the database file "filePath". It then returns the sqlite3_io_methods

5168 ** object that implements that strategy.

5169 **

5170 ** This is for VXWorks only.

5171 */

5172 static const sqlite3_io_methods *autolockIoFinderImpl(

5173 const char filePath, / name of the database file */

5174 unixFile pNew / the open file object */

5175 ){

5176 struct flock lockInfo;

5177

5178 if( !filePath ){

5179 /* If filePath==NULL that means we are dealing with a transient file

5180 ** that does not need to be locked. */

5181 return &nolockIoMethods;

5182 }

5183

5184 /* Test if fcntl() is supported and use POSIX style locks.

5185 ** Otherwise fall back to the named semaphore method.

5186 */

5187 lockInfo.l_len = 1;

5188 lockInfo.l_start = 0;

5189 lockInfo.l_whence = SEEK_SET;

5190 lockInfo.l_type = F_RDLCK;

5191 if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {

5192 return &posixIoMethods;

5193 }else{

5194 return &semIoMethods;

5195 }

5196 }

5197 static const sqlite3_io_methods

5198 (const autolockIoFinder)(const char,unixFile) = autolockIoFinderImpl;

5199

5200 #endif /* OS_VXWORKS && SQLITE_ENABLE_LOCKING_STYLE */

5201

5202 /*

5203 ** An abstract type for a pointer to an IO method finder function:

5204 */

5205 typedef const sqlite3_io_methods (finder_type)(const char,unixFile);

5206

5207

5208 /****************************************************************************

5209 ************************** sqlite3_vfs methods **************************

5210 **

5211 ** This division contains the implementation of methods on the

5212 ** sqlite3_vfs object.

5213 */

5214

5215 /*

5216 ** Initialize the contents of the unixFile structure pointed to by pId.

5217 */

5218 static int fillInUnixFile(

5219 sqlite3_vfs pVfs, / Pointer to vfs object */

5220 int h, /* Open file descriptor of file being opened */

5221 sqlite3_file pId, / Write to the unixFile structure here */

5222 const char zFilename, / Name of the file being opened */

5223 int ctrlFlags /* Zero or more UNIXFILE_* values */

5224 ){

5225 const sqlite3_io_methods *pLockingStyle;

5226 unixFile pNew = (unixFile )pId;

5227 int rc = SQLITE_OK;

5228

5229 assert( pNew->pInode==NULL );

5230

5231 /* Usually the path zFilename should not be a relative pathname. The

5232 ** exception is when opening the proxy "conch" file in builds that

5233 ** include the special Apple locking styles.

5234 */

5235 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

5236 assert( zFilename==0 \|\| zFilename[0]=='/'

5237 \|\| pVfs->pAppData==(void*)&autolockIoFinder );

5238 #else

5239 assert( zFilename==0 \|\| zFilename[0]=='/' );

5240 #endif

5241

5242 /* No locking occurs in temporary files */

5243 assert( zFilename!=0 \|\| (ctrlFlags & UNIXFILE_NOLOCK)!=0 );

5244

5245 OSTRACE(("OPEN %-3d %s\n", h, zFilename));

5246 pNew->h = h;

5247 pNew->pVfs = pVfs;

5248 pNew->zPath = zFilename;

5249 pNew->ctrlFlags = (u8)ctrlFlags;

5250 #if SQLITE_MAX_MMAP_SIZE>0

5251 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;

5252 #endif

5253 if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),

5254 "psow", SQLITE_POWERSAFE_OVERWRITE) ){

5255 pNew->ctrlFlags \|= UNIXFILE_PSOW;

5256 }

5257 if( strcmp(pVfs->zName,"unix-excl")==0 ){

5258 pNew->ctrlFlags \|= UNIXFILE_EXCL;

5259 }

5260

5261 #if OS_VXWORKS

5262 pNew->pId = vxworksFindFileId(zFilename);

5263 if( pNew->pId==0 ){

5264 ctrlFlags \|= UNIXFILE_NOLOCK;

5265 rc = SQLITE_NOMEM;

5266 }

5267 #endif

5268

5269 if( ctrlFlags & UNIXFILE_NOLOCK ){

5270 pLockingStyle = &nolockIoMethods;

5271 }else{

5272 pLockingStyle = (*(finder_type)pVfs->pAppData)(zFilename, pNew);

5273 #if SQLITE_ENABLE_LOCKING_STYLE

5274 /* Cache zFilename in the locking context (AFP and dotlock override) for

5275 ** proxyLock activation is possible (remote proxy is based on db name)

5276 ** zFilename remains valid until file is closed, to support */

5277 pNew->lockingContext = (void*)zFilename;

5278 #endif

5279 }

5280

5281 if( pLockingStyle == &posixIoMethods

5282 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

5283 \|\| pLockingStyle == &nfsIoMethods

5284 #endif

5285 ){

5286 unixEnterMutex();

5287 rc = findInodeInfo(pNew, &pNew->pInode);

5288 if( rc!=SQLITE_OK ){

5289 /* If an error occurred in findInodeInfo(), close the file descriptor

5290 ** immediately, before releasing the mutex. findInodeInfo() may fail

5291 ** in two scenarios:

5292 **

5293 ** (a) A call to fstat() failed.

5294 ** (b) A malloc failed.

5295 **

5296 ** Scenario (b) may only occur if the process is holding no other

5297 ** file descriptors open on the same file. If there were other file

5298 ** descriptors on this file, then no malloc would be required by

5299 ** findInodeInfo(). If this is the case, it is quite safe to close

5300 ** handle h - as it is guaranteed that no posix locks will be released

5301 ** by doing so.

5302 **

5303 ** If scenario (a) caused the error then things are not so safe. The

5304 ** implicit assumption here is that if fstat() fails, things are in

5305 ** such bad shape that dropping a lock or two doesn't matter much.

5306 */

5307 robust_close(pNew, h, __LINE__);

5308 h = -1;

5309 }

5310 unixLeaveMutex();

5311 }

5312

5313 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)

5314 else if( pLockingStyle == &afpIoMethods ){

5315 /* AFP locking uses the file path so it needs to be included in

5316 ** the afpLockingContext.

5317 */

5318 afpLockingContext *pCtx;

5319 pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );

5320 if( pCtx==0 ){

5321 rc = SQLITE_NOMEM;

5322 }else{

5323 /* NB: zFilename exists and remains valid until the file is closed

5324 ** according to requirement F11141. So we do not need to make a

5325 ** copy of the filename. */

5326 pCtx->dbPath = zFilename;

5327 pCtx->reserved = 0;

5328 srandomdev();

5329 unixEnterMutex();

5330 rc = findInodeInfo(pNew, &pNew->pInode);

5331 if( rc!=SQLITE_OK ){

5332 sqlite3_free(pNew->lockingContext);

5333 robust_close(pNew, h, __LINE__);

5334 h = -1;

5335 }

5336 unixLeaveMutex();

5337 }

5338 }

5339 #endif

5340

5341 else if( pLockingStyle == &dotlockIoMethods ){

5342 /* Dotfile locking uses the file path so it needs to be included in

5343 ** the dotlockLockingContext

5344 */

5345 char *zLockFile;

5346 int nFilename;

5347 assert( zFilename!=0 );

5348 nFilename = (int)strlen(zFilename) + 6;

5349 zLockFile = (char *)sqlite3_malloc(nFilename);

5350 if( zLockFile==0 ){

5351 rc = SQLITE_NOMEM;

5352 }else{

5353 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);

5354 }

5355 pNew->lockingContext = zLockFile;

5356 }

5357

5358 #if OS_VXWORKS

5359 else if( pLockingStyle == &semIoMethods ){

5360 /* Named semaphore locking uses the file path so it needs to be

5361 ** included in the semLockingContext

5362 */

5363 unixEnterMutex();

5364 rc = findInodeInfo(pNew, &pNew->pInode);

5365 if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){

5366 char *zSemName = pNew->pInode->aSemName;

5367 int n;

5368 sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem",

5369 pNew->pId->zCanonicalName);

5370 for( n=1; zSemName[n]; n++ )

5371 if( zSemName[n]=='/' ) zSemName[n] = '_';

5372 pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1);

5373 if( pNew->pInode->pSem == SEM_FAILED ){

5374 rc = SQLITE_NOMEM;

5375 pNew->pInode->aSemName[0] = '\0';

5376 }

5377 }

5378 unixLeaveMutex();

5379 }

5380 #endif

5381

5382 pNew->lastErrno = 0;

5383 #if OS_VXWORKS

5384 if( rc!=SQLITE_OK ){

5385 if( h>=0 ) robust_close(pNew, h, __LINE__);

5386 h = -1;

5387 osUnlink(zFilename);

5388 pNew->ctrlFlags \|= UNIXFILE_DELETE;

5389 }

5390 #endif

5391 if( rc!=SQLITE_OK ){

5392 if( h>=0 ) robust_close(pNew, h, __LINE__);

5393 }else{

5394 pNew->pMethod = pLockingStyle;

5395 OpenCounter(+1);

5396 verifyDbFile(pNew);

5397 }

5398 return rc;

5399 }

5400

5401 /*

5402 ** Return the name of a directory in which to put temporary files.

5403 ** If no suitable temporary file directory can be found, return NULL.

5404 */

5405 static const char *unixTempFileDir(void){

5406 static const char *azDirs[] = {

5407 0,

5408 0,

5409 0,

5410 "/var/tmp",

5411 "/usr/tmp",

5412 "/tmp",

5413 0 /* List terminator */

5414 };

5415 unsigned int i;

5416 struct stat buf;

5417 const char *zDir = 0;

5418

5419 azDirs[0] = sqlite3_temp_directory;

5420 if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR");

5421 if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR");

5422 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){

5423 if( zDir==0 ) continue;

5424 if( osStat(zDir, &buf) ) continue;

5425 if( !S_ISDIR(buf.st_mode) ) continue;

5426 if( osAccess(zDir, 07) ) continue;

5427 break;

5428 }

5429 return zDir;

5430 }

5431

5432 /*

5433 ** Create a temporary file name in zBuf. zBuf must be allocated

5434 ** by the calling process and must be big enough to hold at least

5435 ** pVfs->mxPathname bytes.

5436 */

5437 static int unixGetTempname(int nBuf, char *zBuf){

5438 static const unsigned char zChars[] =

5439 "abcdefghijklmnopqrstuvwxyz"

5440 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

5441 "0123456789";

5442 unsigned int i, j;

5443 const char *zDir;

5444

5445 /* It's odd to simulate an io-error here, but really this is just

5446 ** using the io-error infrastructure to test that SQLite handles this

5447 ** function failing.

5448 */

5449 SimulateIOError( return SQLITE_IOERR );

5450

5451 zDir = unixTempFileDir();

5452 if( zDir==0 ) zDir = ".";

5453

5454 /* Check that the output buffer is large enough for the temporary file

5455 ** name. If it is not, return SQLITE_ERROR.

5456 */

5457 if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){

5458 return SQLITE_ERROR;

5459 }

5460

5461 do{

5462 sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);

5463 j = (int)strlen(zBuf);

5464 sqlite3_randomness(15, &zBuf[j]);

5465 for(i=0; i<15; i++, j++){

5466 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];

5467 }

5468 zBuf[j] = 0;

5469 zBuf[j+1] = 0;

5470 }while( osAccess(zBuf,0)==0 );

5471 return SQLITE_OK;

5472 }

5473

5474 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)

5475 /*

5476 ** Routine to transform a unixFile into a proxy-locking unixFile.

5477 ** Implementation in the proxy-lock division, but used by unixOpen()

5478 ** if SQLITE_PREFER_PROXY_LOCKING is defined.

5479 */

5480 static int proxyTransformUnixFile(unixFile, const char);

5481 #endif

5482

5483 /*

5484 ** Search for an unused file descriptor that was opened on the database

5485 ** file (not a journal or master-journal file) identified by pathname

5486 ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second

5487 ** argument to this function.

5488 **

5489 ** Such a file descriptor may exist if a database connection was closed

5490 ** but the associated file descriptor could not be closed because some

5491 ** other file descriptor open on the same file is holding a file-lock.

5492 ** Refer to comments in the unixClose() function and the lengthy comment

5493 ** describing "Posix Advisory Locking" at the start of this file for

5494 ** further details. Also, ticket #4018.

5495 **

5496 ** If a suitable file descriptor is found, then it is returned. If no

5497 ** such file descriptor is located, -1 is returned.

5498 */

5499 static UnixUnusedFd findReusableFd(const char zPath, int flags){

5500 UnixUnusedFd *pUnused = 0;

5501

5502 /* Do not search for an unused file descriptor on vxworks. Not because

5503 ** vxworks would not benefit from the change (it might, we're not sure),

5504 ** but because no way to test it is currently available. It is better

5505 ** not to risk breaking vxworks support for the sake of such an obscure

5506 ** feature. */

5507 #if !OS_VXWORKS

5508 struct stat sStat; /* Results of stat() call */

5509

5510 /* A stat() call may fail for various reasons. If this happens, it is

5511 ** almost certain that an open() call on the same path will also fail.

5512 ** For this reason, if an error occurs in the stat() call here, it is

5513 ** ignored and -1 is returned. The caller will try to open a new file

5514 ** descriptor on the same path, fail, and return an error to SQLite.

5515 **

5516 ** Even if a subsequent open() call does succeed, the consequences of

5517 ** not searching for a reusable file descriptor are not dire. */

5518 if( 0==osStat(zPath, &sStat) ){

5519 unixInodeInfo *pInode;

5520

5521 unixEnterMutex();

5522 pInode = inodeList;

5523 while( pInode && (pInode->fileId.dev!=sStat.st_dev

5524 \|\| pInode->fileId.ino!=sStat.st_ino) ){

5525 pInode = pInode->pNext;

5526 }

5527 if( pInode ){

5528 UnixUnusedFd **pp;

5529 for(pp=&pInode->pUnused; pp && (pp)->flags!=flags; pp=&((*pp)->pNext));

5530 pUnused = *pp;

5531 if( pUnused ){

5532 *pp = pUnused->pNext;

5533 }

5534 }

5535 unixLeaveMutex();

5536 }

5537 #endif /* if !OS_VXWORKS */

5538 return pUnused;

5539 }

5540

5541 /*

5542 ** This function is called by unixOpen() to determine the unix permissions

5543 ** to create new files with. If no error occurs, then SQLITE_OK is returned

5544 ** and a value suitable for passing as the third argument to open(2) is

5545 ** written to *pMode. If an IO error occurs, an SQLite error code is

5546 ** returned and the value of *pMode is not modified.

5547 **

5548 ** In most cases, this routine sets *pMode to 0, which will become

5549 ** an indication to robust_open() to create the file using

5550 ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.

5551 ** But if the file being opened is a WAL or regular journal file, then

5552 ** this function queries the file-system for the permissions on the

5553 ** corresponding database file and sets *pMode to this value. Whenever

5554 ** possible, WAL and journal files are created using the same permissions

5555 ** as the associated database file.

5556 **

5557 ** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the

5558 ** original filename is unavailable. But 8_3_NAMES is only used for

5559 ** FAT filesystems and permissions do not matter there, so just use

5560 ** the default permissions.

5561 */

5562 static int findCreateFileMode(

5563 const char zPath, / Path of file (possibly) being created */

5564 int flags, /* Flags passed as 4th argument to xOpen() */

5565 mode_t pMode, / OUT: Permissions to open file with */

5566 uid_t pUid, / OUT: uid to set on the file */

5567 gid_t pGid / OUT: gid to set on the file */

5568 ){

5569 int rc = SQLITE_OK; /* Return Code */

5570 *pMode = 0;

5571 *pUid = 0;

5572 *pGid = 0;

5573 if( flags & (SQLITE_OPEN_WAL\|SQLITE_OPEN_MAIN_JOURNAL) ){

5574 char zDb[MAX_PATHNAME+1]; /* Database file path */

5575 int nDb; /* Number of valid bytes in zDb */

5576 struct stat sStat; /* Output of stat() on database file */

5577

5578 /* zPath is a path to a WAL or journal file. The following block derives

5579 ** the path to the associated database file from zPath. This block handles

5580 ** the following naming conventions:

5581 **

5582 ** "<path to db>-journal"

5583 ** "<path to db>-wal"

5584 ** "<path to db>-journalNN"

5585 ** "<path to db>-walNN"

5586 **

5587 ** where NN is a decimal number. The NN naming schemes are

5588 ** used by the test_multiplex.c module.

5589 */

5590 nDb = sqlite3Strlen30(zPath) - 1;

5591 #ifdef SQLITE_ENABLE_8_3_NAMES

5592 while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--;

5593 if( nDb==0 \|\| zPath[nDb]!='-' ) return SQLITE_OK;

5594 #else

5595 while( zPath[nDb]!='-' ){

5596 assert( nDb>0 );

5597 assert( zPath[nDb]!='\n' );

5598 nDb--;

5599 }

5600 #endif

5601 memcpy(zDb, zPath, nDb);

5602 zDb[nDb] = '\0';

5603

5604 if( 0==osStat(zDb, &sStat) ){

5605 *pMode = sStat.st_mode & 0777;

5606 *pUid = sStat.st_uid;

5607 *pGid = sStat.st_gid;

5608 }else{

5609 rc = SQLITE_IOERR_FSTAT;

5610 }

5611 }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){

5612 *pMode = 0600;

5613 }

5614 return rc;

5615 }

5616

5617 /*

5618 ** Open the file zPath.

5619 **

5620 ** Previously, the SQLite OS layer used three functions in place of this

5621 ** one:

5622 **

5623 ** sqlite3OsOpenReadWrite();

5624 ** sqlite3OsOpenReadOnly();

5625 ** sqlite3OsOpenExclusive();

5626 **

5627 ** These calls correspond to the following combinations of flags:

5628 **

5629 ** ReadWrite() -> (READWRITE \| CREATE)

5630 ** ReadOnly() -> (READONLY)

5631 ** OpenExclusive() -> (READWRITE \| CREATE \| EXCLUSIVE)

5632 **

5633 ** The old OpenExclusive() accepted a boolean argument - "delFlag". If

5634 ** true, the file was configured to be automatically deleted when the

5635 ** file handle closed. To achieve the same effect using this new

5636 ** interface, add the DELETEONCLOSE flag to those specified above for

5637 ** OpenExclusive().

5638 */

5639 static int unixOpen(

5640 sqlite3_vfs pVfs, / The VFS for which this is the xOpen method */

5641 const char zPath, / Pathname of file to be opened */

5642 sqlite3_file pFile, / The file descriptor to be filled in */

5643 int flags, /* Input flags to control the opening */

5644 int pOutFlags / Output flags returned to SQLite core */

5645 ){

5646 unixFile p = (unixFile )pFile;

5647 int fd = -1; /* File descriptor returned by open() */

5648 int openFlags = 0; /* Flags to pass to open() */

5649 int eType = flags&0xFFFFFF00; /* Type of file to open */

5650 int noLock; /* True to omit locking primitives */

5651 int rc = SQLITE_OK; /* Function Return Code */

5652 int ctrlFlags = 0; /* UNIXFILE_* flags */

5653

5654 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);

5655 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);

5656 int isCreate = (flags & SQLITE_OPEN_CREATE);

5657 int isReadonly = (flags & SQLITE_OPEN_READONLY);

5658 int isReadWrite = (flags & SQLITE_OPEN_READWRITE);

5659 #if SQLITE_ENABLE_LOCKING_STYLE

5660 int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);

5661 #endif

5662 #if defined(__APPLE__) \|\| SQLITE_ENABLE_LOCKING_STYLE

5663 struct statfs fsInfo;

5664 #endif

5665

5666 /* If creating a master or main-file journal, this function will open

5667 ** a file-descriptor on the directory too. The first time unixSync()

5668 ** is called the directory file descriptor will be fsync()ed and close()d.

5669 */

5670 int syncDir = (isCreate && (

5671 eType==SQLITE_OPEN_MASTER_JOURNAL

5672 \|\| eType==SQLITE_OPEN_MAIN_JOURNAL

5673 \|\| eType==SQLITE_OPEN_WAL

5674 ));

5675

5676 /* If argument zPath is a NULL pointer, this function is required to open

5677 ** a temporary file. Use this buffer to store the file name in.

5678 */

5679 char zTmpname[MAX_PATHNAME+2];

5680 const char *zName = zPath;

5681

5682 /* Check the following statements are true:

5683 **

5684 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and

5685 ** (b) if CREATE is set, then READWRITE must also be set, and

5686 ** (c) if EXCLUSIVE is set, then CREATE must also be set.

5687 ** (d) if DELETEONCLOSE is set, then CREATE must also be set.

5688 */

5689 assert((isReadonly==0 \|\| isReadWrite==0) && (isReadWrite \|\| isReadonly));

5690 assert(isCreate==0 \|\| isReadWrite);

5691 assert(isExclusive==0 \|\| isCreate);

5692 assert(isDelete==0 \|\| isCreate);

5693

5694 /* The main DB, main journal, WAL file and master journal are never

5695 ** automatically deleted. Nor are they ever temporary files. */

5696 assert( (!isDelete && zName) \|\| eType!=SQLITE_OPEN_MAIN_DB );

5697 assert( (!isDelete && zName) \|\| eType!=SQLITE_OPEN_MAIN_JOURNAL );

5698 assert( (!isDelete && zName) \|\| eType!=SQLITE_OPEN_MASTER_JOURNAL );

5699 assert( (!isDelete && zName) \|\| eType!=SQLITE_OPEN_WAL );

5700

5701 /* Assert that the upper layer has set one of the "file-type" flags. */

5702 assert( eType==SQLITE_OPEN_MAIN_DB \|\| eType==SQLITE_OPEN_TEMP_DB

5703 \|\| eType==SQLITE_OPEN_MAIN_JOURNAL \|\| eType==SQLITE_OPEN_TEMP_JOURNAL

5704 \|\| eType==SQLITE_OPEN_SUBJOURNAL \|\| eType==SQLITE_OPEN_MASTER_JOURNAL

5705 \|\| eType==SQLITE_OPEN_TRANSIENT_DB \|\| eType==SQLITE_OPEN_WAL

5706 );

5707

5708 /* Detect a pid change and reset the PRNG. There is a race condition

5709 ** here such that two or more threads all trying to open databases at

5710 ** the same instant might all reset the PRNG. But multiple resets

5711 ** are harmless.

5712 */

5713 if( randomnessPid!=getpid() ){

5714 randomnessPid = getpid();

5715 sqlite3_randomness(0,0);

5716 }

5717

5718 memset(p, 0, sizeof(unixFile));

5719

5720 if( eType==SQLITE_OPEN_MAIN_DB ){

5721 UnixUnusedFd *pUnused;

5722 pUnused = findReusableFd(zName, flags);

5723 if( pUnused ){

5724 fd = pUnused->fd;

5725 }else{

5726 pUnused = sqlite3_malloc(sizeof(*pUnused));

5727 if( !pUnused ){

5728 return SQLITE_NOMEM;

5729 }

5730 }

5731 p->pUnused = pUnused;

5732

5733 /* Database filenames are double-zero terminated if they are not

5734 ** URIs with parameters. Hence, they can always be passed into

5735 ** sqlite3_uri_parameter(). */

5736 assert( (flags & SQLITE_OPEN_URI) \|\| zName[strlen(zName)+1]==0 );

5737

5738 }else if( !zName ){

5739 /* If zName is NULL, the upper layer is requesting a temp file. */

5740 assert(isDelete && !syncDir);

5741 rc = unixGetTempname(MAX_PATHNAME+2, zTmpname);

5742 if( rc!=SQLITE_OK ){

5743 return rc;

5744 }

5745 zName = zTmpname;

5746

5747 /* Generated temporary filenames are always double-zero terminated

5748 ** for use by sqlite3_uri_parameter(). */

5749 assert( zName[strlen(zName)+1]==0 );

5750 }

5751

5752 /* Determine the value of the flags parameter passed to POSIX function

5753 ** open(). These must be calculated even if open() is not called, as

5754 ** they may be stored as part of the file handle and used by the

5755 ** 'conch file' locking functions later on. */

5756 if( isReadonly ) openFlags \|= O_RDONLY;

5757 if( isReadWrite ) openFlags \|= O_RDWR;

5758 if( isCreate ) openFlags \|= O_CREAT;

5759 if( isExclusive ) openFlags \|= (O_EXCL\|O_NOFOLLOW);

5760 openFlags \|= (O_LARGEFILE\|O_BINARY);

5761

5762 if( fd<0 ){

5763 mode_t openMode; /* Permissions to create file with */

5764 uid_t uid; /* Userid for the file */

5765 gid_t gid; /* Groupid for the file */

5766 rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);

5767 if( rc!=SQLITE_OK ){

5768 assert( !p->pUnused );

5769 assert( eType==SQLITE_OPEN_WAL \|\| eType==SQLITE_OPEN_MAIN_JOURNAL );

5770 return rc;

5771 }

5772 fd = robust_open(zName, openFlags, openMode);

5773 OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags));

5774 if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){

5775 /* Failed to open the file for read/write access. Try read-only. */

5776 flags &= ~(SQLITE_OPEN_READWRITE\|SQLITE_OPEN_CREATE);

5777 openFlags &= ~(O_RDWR\|O_CREAT);

5778 flags \|= SQLITE_OPEN_READONLY;

5779 openFlags \|= O_RDONLY;

5780 isReadonly = 1;

5781 fd = robust_open(zName, openFlags, openMode);

5782 }

5783 if( fd<0 ){

5784 rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);

5785 goto open_finished;

5786 }

5787

5788 /* If this process is running as root and if creating a new rollback

5789 ** journal or WAL file, set the ownership of the journal or WAL to be

5790 ** the same as the original database.

5791 */

5792 if( flags & (SQLITE_OPEN_WAL\|SQLITE_OPEN_MAIN_JOURNAL) ){

5793 osFchown(fd, uid, gid);

5794 }

5795 }

5796 assert( fd>=0 );

5797 if( pOutFlags ){

5798 *pOutFlags = flags;

5799 }

5800

5801 if( p->pUnused ){

5802 p->pUnused->fd = fd;

5803 p->pUnused->flags = flags;

5804 }

5805

5806 if( isDelete ){

5807 #if OS_VXWORKS

5808 zPath = zName;

5809 #elif defined(SQLITE_UNLINK_AFTER_CLOSE)

5810 zPath = sqlite3_mprintf("%s", zName);

5811 if( zPath==0 ){

5812 robust_close(p, fd, __LINE__);

5813 return SQLITE_NOMEM;

5814 }

5815 #else

5816 osUnlink(zName);

5817 #endif

5818 }

5819 #if SQLITE_ENABLE_LOCKING_STYLE

5820 else{

5821 p->openFlags = openFlags;

5822 }

5823 #endif

5824

5825 noLock = eType!=SQLITE_OPEN_MAIN_DB;

5826

5827

5828 #if defined(__APPLE__) \|\| SQLITE_ENABLE_LOCKING_STYLE

5829 if( fstatfs(fd, &fsInfo) == -1 ){

5830 ((unixFile*)pFile)->lastErrno = errno;

5831 robust_close(p, fd, __LINE__);

5832 return SQLITE_IOERR_ACCESS;

5833 }

5834 if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {

5835 ((unixFile*)pFile)->fsFlags \|= SQLITE_FSFLAGS_IS_MSDOS;

5836 }

5837 #endif

5838

5839 /* Set up appropriate ctrlFlags */

5840 if( isDelete ) ctrlFlags \|= UNIXFILE_DELETE;

5841 if( isReadonly ) ctrlFlags \|= UNIXFILE_RDONLY;

5842 if( noLock ) ctrlFlags \|= UNIXFILE_NOLOCK;

5843 if( syncDir ) ctrlFlags \|= UNIXFILE_DIRSYNC;

5844 if( flags & SQLITE_OPEN_URI ) ctrlFlags \|= UNIXFILE_URI;

5845

5846 #if SQLITE_ENABLE_LOCKING_STYLE

5847 #if SQLITE_PREFER_PROXY_LOCKING

5848 isAutoProxy = 1;

5849 #endif

5850 if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){

5851 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");

5852 int useProxy = 0;

5853

5854 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means

5855 ** never use proxy, NULL means use proxy for non-local files only. */

5856 if( envforce!=NULL ){

5857 useProxy = atoi(envforce)>0;

5858 }else{

5859 if( statfs(zPath, &fsInfo) == -1 ){

5860 /* In theory, the close(fd) call is sub-optimal. If the file opened

5861 ** with fd is a database file, and there are other connections open

5862 ** on that file that are currently holding advisory locks on it,

5863 ** then the call to close() will cancel those locks. In practice,

5864 ** we're assuming that statfs() doesn't fail very often. At least

5865 ** not while other file descriptors opened by the same process on

5866 ** the same file are working. */

5867 p->lastErrno = errno;

5868 robust_close(p, fd, __LINE__);

5869 rc = SQLITE_IOERR_ACCESS;

5870 goto open_finished;

5871 }

5872 useProxy = !(fsInfo.f_flags&MNT_LOCAL);

5873 }

5874 if( useProxy ){

5875 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);

5876 if( rc==SQLITE_OK ){

5877 rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");

5878 if( rc!=SQLITE_OK ){

5879 /* Use unixClose to clean up the resources added in fillInUnixFile

5880 ** and clear all the structure's references. Specifically,

5881 ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op

5882 */

5883 unixClose(pFile);

5884 return rc;

5885 }

5886 }

5887 goto open_finished;

5888 }

5889 }

5890 #endif

5891

5892 rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);

5893

5894 open_finished:

5895 if( rc!=SQLITE_OK ){

5896 sqlite3_free(p->pUnused);

5897 }

5898 return rc;

5899 }

5900

5901

5902 /*

5903 ** Delete the file at zPath. If the dirSync argument is true, fsync()

5904 ** the directory after deleting the file.

5905 */

5906 static int unixDelete(

5907 sqlite3_vfs NotUsed, / VFS containing this as the xDelete method */

5908 const char zPath, / Name of file to be deleted */

5909 int dirSync /* If true, fsync() directory after deleting file */

5910 ){

5911 int rc = SQLITE_OK;

5912 UNUSED_PARAMETER(NotUsed);

5913 SimulateIOError(return SQLITE_IOERR_DELETE);

5914 if( osUnlink(zPath)==(-1) ){

5915 if( errno==ENOENT

5916 #if OS_VXWORKS

5917 \|\| osAccess(zPath,0)!=0

5918 #endif

5919 ){

5920 rc = SQLITE_IOERR_DELETE_NOENT;

5921 }else{

5922 rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);

5923 }

5924 return rc;

5925 }

5926 #ifndef SQLITE_DISABLE_DIRSYNC

5927 if( (dirSync & 1)!=0 ){

5928 int fd;

5929 rc = osOpenDirectory(zPath, &fd);

5930 if( rc==SQLITE_OK ){

5931 #if OS_VXWORKS

5932 if( fsync(fd)==-1 )

5933 #else

5934 if( fsync(fd) )

5935 #endif

5936 {

5937 rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);

5938 }

5939 robust_close(0, fd, __LINE__);

5940 }else if( rc==SQLITE_CANTOPEN ){

5941 rc = SQLITE_OK;

5942 }

5943 }

5944 #endif

5945 return rc;

5946 }

5947

5948 /*

5949 ** Test the existence of or access permissions of file zPath. The

5950 ** test performed depends on the value of flags:

5951 **

5952 ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists

5953 ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.

5954 ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.

5955 **

5956 ** Otherwise return 0.

5957 */

5958 static int unixAccess(

5959 sqlite3_vfs NotUsed, / The VFS containing this xAccess method */

5960 const char zPath, / Path of the file to examine */

5961 int flags, /* What do we want to learn about the zPath file? */

5962 int pResOut / Write result boolean here */

5963 ){

5964 int amode = 0;

5965 UNUSED_PARAMETER(NotUsed);

5966 SimulateIOError( return SQLITE_IOERR_ACCESS; );

5967 switch( flags ){

5968 case SQLITE_ACCESS_EXISTS:

5969 amode = F_OK;

5970 break;

5971 case SQLITE_ACCESS_READWRITE:

5972 amode = W_OK\|R_OK;

5973 break;

5974 case SQLITE_ACCESS_READ:

5975 amode = R_OK;

5976 break;

5977

5978 default:

5979 assert(!"Invalid flags argument");

5980 }

5981 *pResOut = (osAccess(zPath, amode)==0);

5982 if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){

5983 struct stat buf;

5984 if( 0==osStat(zPath, &buf) && buf.st_size==0 ){

5985 *pResOut = 0;

5986 }

5987 }

5988 return SQLITE_OK;

5989 }

5990

5991

5992 /*

5993 ** Turn a relative pathname into a full pathname. The relative path

5994 ** is stored as a nul-terminated string in the buffer pointed to by

5995 ** zPath.

5996 **

5997 ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes

5998 ** (in this case, MAX_PATHNAME bytes). The full-path is written to

5999 ** this buffer before returning.

6000 */

6001 static int unixFullPathname(

6002 sqlite3_vfs pVfs, / Pointer to vfs object */

6003 const char zPath, / Possibly relative input path */

6004 int nOut, /* Size of output buffer in bytes */

6005 char zOut / Output buffer */

6006 ){

6007

6008 /* It's odd to simulate an io-error here, but really this is just

6009 ** using the io-error infrastructure to test that SQLite handles this

6010 ** function failing. This function could fail if, for example, the

6011 ** current working directory has been unlinked.

6012 */

6013 SimulateIOError( return SQLITE_ERROR );

6014

6015 assert( pVfs->mxPathname==MAX_PATHNAME );

6016 UNUSED_PARAMETER(pVfs);

6017

6018 zOut[nOut-1] = '\0';

6019 if( zPath[0]=='/' ){

6020 sqlite3_snprintf(nOut, zOut, "%s", zPath);

6021 }else{

6022 int nCwd;

6023 if( osGetcwd(zOut, nOut-1)==0 ){

6024 return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath);

6025 }

6026 nCwd = (int)strlen(zOut);

6027 sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);

6028 }

6029 return SQLITE_OK;

6030 }

6031

6032

6033 #ifndef SQLITE_OMIT_LOAD_EXTENSION

6034 /*

6035 ** Interfaces for opening a shared library, finding entry points

6036 ** within the shared library, and closing the shared library.

6037 */

6038 #include <dlfcn.h>

6039 static void unixDlOpen(sqlite3_vfs NotUsed, const char *zFilename){

6040 UNUSED_PARAMETER(NotUsed);

6041 return dlopen(zFilename, RTLD_NOW \| RTLD_GLOBAL);

6042 }

6043

6044 /*

6045 ** SQLite calls this function immediately after a call to unixDlSym() or

6046 ** unixDlOpen() fails (returns a null pointer). If a more detailed error

6047 ** message is available, it is written to zBufOut. If no error message

6048 ** is available, zBufOut is left unmodified and SQLite uses a default

6049 ** error message.

6050 */

6051 static void unixDlError(sqlite3_vfs NotUsed, int nBuf, char zBufOut){

6052 const char *zErr;

6053 UNUSED_PARAMETER(NotUsed);

6054 unixEnterMutex();

6055 zErr = dlerror();

6056 if( zErr ){

6057 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);

6058 }

6059 unixLeaveMutex();

6060 }

6061 static void (unixDlSym(sqlite3_vfs NotUsed, void p, const charzSym))(void){

6062 /*

6063 ** GCC with -pedantic-errors says that C90 does not allow a void* to be

6064 ** cast into a pointer to a function. And yet the library dlsym() routine

6065 ** returns a void* which is really a pointer to a function. So how do we

6066 ** use dlsym() with -pedantic-errors?

6067 **

6068 ** Variable x below is defined to be a pointer to a function taking

6069 ** parameters void* and const char* and returning a pointer to a function.

6070 ** We initialize x by assigning it a pointer to the dlsym() function.

6071 ** (That assignment requires a cast.) Then we call the function that

6072 ** x points to.

6073 **

6074 ** This work-around is unlikely to work correctly on any system where

6075 ** you really cannot cast a function pointer into void*. But then, on the

6076 ** other hand, dlsym() will not work on such a system either, so we have

6077 ** not really lost anything.

6078 */

6079 void ((x)(void,const char))(void);

6080 UNUSED_PARAMETER(NotUsed);

6081 x = (void(()(void,const char))(void))dlsym;

6082 return (*x)(p, zSym);

6083 }

6084 static void unixDlClose(sqlite3_vfs NotUsed, void pHandle){

6085 UNUSED_PARAMETER(NotUsed);

6086 dlclose(pHandle);

6087 }

6088 #else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */

6089 #define unixDlOpen 0

6090 #define unixDlError 0

6091 #define unixDlSym 0

6092 #define unixDlClose 0

6093 #endif

6094

6095 /*

6096 ** Write nBuf bytes of random data to the supplied buffer zBuf.

6097 */

6098 static int unixRandomness(sqlite3_vfs NotUsed, int nBuf, char zBuf){

6099 UNUSED_PARAMETER(NotUsed);

6100 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int)));

6101

6102 /* We have to initialize zBuf to prevent valgrind from reporting

6103 ** errors. The reports issued by valgrind are incorrect - we would

6104 ** prefer that the randomness be increased by making use of the

6105 ** uninitialized space in zBuf - but valgrind errors tend to worry

6106 ** some users. Rather than argue, it seems easier just to initialize

6107 ** the whole array and silence valgrind, even if that means less randomness

6108 ** in the random seed.

6109 **

6110 ** When testing, initializing zBuf[] to zero is all we do. That means

6111 ** that we always use the same random number sequence. This makes the

6112 ** tests repeatable.

6113 */

6114 memset(zBuf, 0, nBuf);

6115 randomnessPid = getpid();

6116 #if !defined(SQLITE_TEST)

6117 {

6118 int fd, got;

6119 fd = robust_open("/dev/urandom", O_RDONLY, 0);

6120 if( fd<0 ){

6121 time_t t;

6122 time(&t);

6123 memcpy(zBuf, &t, sizeof(t));

6124 memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid));

6125 assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf );

6126 nBuf = sizeof(t) + sizeof(randomnessPid);

6127 }else{

6128 do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR );

6129 robust_close(0, fd, __LINE__);

6130 }

6131 }

6132 #endif

6133 return nBuf;

6134 }

6135

6136

6137 /*

6138 ** Sleep for a little while. Return the amount of time slept.

6139 ** The argument is the number of microseconds we want to sleep.

6140 ** The return value is the number of microseconds of sleep actually

6141 ** requested from the underlying operating system, a number which

6142 ** might be greater than or equal to the argument, but not less

6143 ** than the argument.

6144 */

6145 static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){

6146 #if OS_VXWORKS

6147 struct timespec sp;

6148

6149 sp.tv_sec = microseconds / 1000000;

6150 sp.tv_nsec = (microseconds % 1000000) * 1000;

6151 nanosleep(&sp, NULL);

6152 UNUSED_PARAMETER(NotUsed);

6153 return microseconds;

6154 #elif defined(HAVE_USLEEP) && HAVE_USLEEP

6155 usleep(microseconds);

6156 UNUSED_PARAMETER(NotUsed);

6157 return microseconds;

6158 #else

6159 int seconds = (microseconds+999999)/1000000;

6160 sleep(seconds);

6161 UNUSED_PARAMETER(NotUsed);

6162 return seconds*1000000;

6163 #endif

6164 }

6165

6166 /*

6167 ** The following variable, if set to a non-zero value, is interpreted as

6168 ** the number of seconds since 1970 and is used to set the result of

6169 ** sqlite3OsCurrentTime() during testing.

6170 */

6171 #ifdef SQLITE_TEST

6172 int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */

6173 #endif

6174

6175 /*

6176 ** Find the current time (in Universal Coordinated Time). Write into *piNow

6177 ** the current time and date as a Julian Day number times 86_400_000. In

6178 ** other words, write into *piNow the number of milliseconds since the Julian

6179 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the

6180 ** proleptic Gregorian calendar.

6181 **

6182 ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date

6183 ** cannot be found.

6184 */

6185 static int unixCurrentTimeInt64(sqlite3_vfs NotUsed, sqlite3_int64 piNow){

6186 static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;

6187 int rc = SQLITE_OK;

6188 #if defined(NO_GETTOD)

6189 time_t t;

6190 time(&t);

6191 piNow = ((sqlite3_int64)t)1000 + unixEpoch;

6192 #elif OS_VXWORKS

6193 struct timespec sNow;

6194 clock_gettime(CLOCK_REALTIME, &sNow);

6195 piNow = unixEpoch + 1000(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;

6196 #else

6197 struct timeval sNow;

6198 if( gettimeofday(&sNow, 0)==0 ){

6199 piNow = unixEpoch + 1000(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;

6200 }else{

6201 rc = SQLITE_ERROR;

6202 }

6203 #endif

6204

6205 #ifdef SQLITE_TEST

6206 if( sqlite3_current_time ){

6207 piNow = 1000(sqlite3_int64)sqlite3_current_time + unixEpoch;

6208 }

6209 #endif

6210 UNUSED_PARAMETER(NotUsed);

6211 return rc;

6212 }

6213

6214 /*

6215 ** Find the current time (in Universal Coordinated Time). Write the

6216 ** current time and date as a Julian Day number into *prNow and

6217 ** return 0. Return 1 if the time and date cannot be found.

6218 */

6219 static int unixCurrentTime(sqlite3_vfs NotUsed, double prNow){

6220 sqlite3_int64 i = 0;

6221 int rc;

6222 UNUSED_PARAMETER(NotUsed);

6223 rc = unixCurrentTimeInt64(0, &i);

6224 *prNow = i/86400000.0;

6225 return rc;

6226 }

6227

6228 /*

6229 ** We added the xGetLastError() method with the intention of providing

6230 ** better low-level error messages when operating-system problems come up

6231 ** during SQLite operation. But so far, none of that has been implemented

6232 ** in the core. So this routine is never called. For now, it is merely

6233 ** a place-holder.

6234 */

6235 static int unixGetLastError(sqlite3_vfs NotUsed, int NotUsed2, char NotUsed3){

6236 UNUSED_PARAMETER(NotUsed);

6237 UNUSED_PARAMETER(NotUsed2);

6238 UNUSED_PARAMETER(NotUsed3);

6239 return 0;

6240 }

6241

6242

6243 /*

6244 ********************** End of sqlite3_vfs methods *************************

6245 ******************************************************************************/

6246

6247 /******************************************************************************

6248 ************************ Begin Proxy Locking ******************************

6249 **

6250 ** Proxy locking is a "uber-locking-method" in this sense: It uses the

6251 ** other locking methods on secondary lock files. Proxy locking is a

6252 ** meta-layer over top of the primitive locking implemented above. For

6253 ** this reason, the division that implements of proxy locking is deferred

6254 ** until late in the file (here) after all of the other I/O methods have

6255 ** been defined - so that the primitive locking methods are available

6256 ** as services to help with the implementation of proxy locking.

6257 **

6258 ****

6259 **

6260 ** The default locking schemes in SQLite use byte-range locks on the

6261 ** database file to coordinate safe, concurrent access by multiple readers

6262 ** and writers [http://sqlite.org/lockingv3.html]. The five file locking

6263 ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented

6264 ** as POSIX read & write locks over fixed set of locations (via fsctl),

6265 ** on AFP and SMB only exclusive byte-range locks are available via fsctl

6266 ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.

6267 ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected

6268 ** address in the shared range is taken for a SHARED lock, the entire

6269 ** shared range is taken for an EXCLUSIVE lock):

6270 **

6271 ** PENDING_BYTE 0x40000000

6272 ** RESERVED_BYTE 0x40000001

6273 ** SHARED_RANGE 0x40000002 -> 0x40000200

6274 **

6275 ** This works well on the local file system, but shows a nearly 100x

6276 ** slowdown in read performance on AFP because the AFP client disables

6277 ** the read cache when byte-range locks are present. Enabling the read

6278 ** cache exposes a cache coherency problem that is present on all OS X

6279 ** supported network file systems. NFS and AFP both observe the

6280 ** close-to-open semantics for ensuring cache coherency

6281 ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively

6282 ** address the requirements for concurrent database access by multiple

6283 ** readers and writers

6284 ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].

6285 **

6286 ** To address the performance and cache coherency issues, proxy file locking

6287 ** changes the way database access is controlled by limiting access to a

6288 ** single host at a time and moving file locks off of the database file

6289 ** and onto a proxy file on the local file system.

6290 **

6291 **

6292 ** Using proxy locks

6293 ** -----------------

6294 **

6295 ** C APIs

6296 **

6297 ** sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE,

6298 ** <proxy_path> \| ":auto:");

6299 ** sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>);

6300 **

6301 **

6302 ** SQL pragmas

6303 **

6304 ** PRAGMA [database.]lock_proxy_file=<proxy_path> \| :auto:

6305 ** PRAGMA [database.]lock_proxy_file

6306 **

6307 ** Specifying ":auto:" means that if there is a conch file with a matching

6308 ** host ID in it, the proxy path in the conch file will be used, otherwise

6309 ** a proxy path based on the user's temp dir

6310 ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the

6311 ** actual proxy file name is generated from the name and path of the

6312 ** database file. For example:

6313 **

6314 ** For database path "/Users/me/foo.db"

6315 ** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")

6316 **

6317 ** Once a lock proxy is configured for a database connection, it can not

6318 ** be removed, however it may be switched to a different proxy path via

6319 ** the above APIs (assuming the conch file is not being held by another

6320 ** connection or process).

6321 **

6322 **

6323 ** How proxy locking works

6324 ** -----------------------

6325 **

6326 ** Proxy file locking relies primarily on two new supporting files:

6327 **

6328 ** * conch file to limit access to the database file to a single host

6329 ** at a time

6330 **

6331 ** * proxy file to act as a proxy for the advisory locks normally

6332 ** taken on the database

6333 **

6334 ** The conch file - to use a proxy file, sqlite must first "hold the conch"

6335 ** by taking an sqlite-style shared lock on the conch file, reading the

6336 ** contents and comparing the host's unique host ID (see below) and lock

6337 ** proxy path against the values stored in the conch. The conch file is

6338 ** stored in the same directory as the database file and the file name

6339 ** is patterned after the database file name as ".<databasename>-conch".

6340 ** If the conch file does not exist, or its contents do not match the

6341 ** host ID and/or proxy path, then the lock is escalated to an exclusive

6342 ** lock and the conch file contents is updated with the host ID and proxy

6343 ** path and the lock is downgraded to a shared lock again. If the conch

6344 ** is held by another process (with a shared lock), the exclusive lock

6345 ** will fail and SQLITE_BUSY is returned.

6346 **

6347 ** The proxy file - a single-byte file used for all advisory file locks

6348 ** normally taken on the database file. This allows for safe sharing

6349 ** of the database file for multiple readers and writers on the same

6350 ** host (the conch ensures that they all use the same local lock file).

6351 **

6352 ** Requesting the lock proxy does not immediately take the conch, it is

6353 ** only taken when the first request to lock database file is made.

6354 ** This matches the semantics of the traditional locking behavior, where

6355 ** opening a connection to a database file does not take a lock on it.

6356 ** The shared lock and an open file descriptor are maintained until

6357 ** the connection to the database is closed.

6358 **

6359 ** The proxy file and the lock file are never deleted so they only need

6360 ** to be created the first time they are used.

6361 **

6362 ** Configuration options

6363 ** ---------------------

6364 **

6365 ** SQLITE_PREFER_PROXY_LOCKING

6366 **

6367 ** Database files accessed on non-local file systems are

6368 ** automatically configured for proxy locking, lock files are

6369 ** named automatically using the same logic as

6370 ** PRAGMA lock_proxy_file=":auto:"

6371 **

6372 ** SQLITE_PROXY_DEBUG

6373 **

6374 ** Enables the logging of error messages during host id file

6375 ** retrieval and creation

6376 **

6377 ** LOCKPROXYDIR

6378 **

6379 ** Overrides the default directory used for lock proxy files that

6380 ** are named automatically via the ":auto:" setting

6381 **

6382 ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS

6383 **

6384 ** Permissions to use when creating a directory for storing the

6385 ** lock proxy files, only used when LOCKPROXYDIR is not set.

6386 **

6387 **

6388 ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,

6389 ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will

6390 ** force proxy locking to be used for every database file opened, and 0

6391 ** will force automatic proxy locking to be disabled for all database

6392 ** files (explicitly calling the SQLITE_SET_LOCKPROXYFILE pragma or

6393 ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).

6394 */

6395

6396 /*

6397 ** Proxy locking is only available on MacOSX

6398 */

6399 #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE

6400

6401 /*

6402 ** The proxyLockingContext has the path and file structures for the remote

6403 ** and local proxy files in it

6404 */

6405 typedef struct proxyLockingContext proxyLockingContext;

6406 struct proxyLockingContext {

6407 unixFile conchFile; / Open conch file */

6408 char conchFilePath; / Name of the conch file */

6409 unixFile lockProxy; / Open proxy lock file */

6410 char lockProxyPath; / Name of the proxy lock file */

6411 char dbPath; / Name of the open file */

6412 int conchHeld; /* 1 if the conch is held, -1 if lockless */

6413 void oldLockingContext; / Original lockingcontext to restore on close */

6414 sqlite3_io_methods const pOldMethod; / Original I/O methods for close */

6415 };

6416

6417 /*

6418 ** The proxy lock file path for the database at dbPath is written into lPath,

6419 ** which must point to valid, writable memory large enough for a maxLen length

6420 ** file path.

6421 */

6422 static int proxyGetLockPath(const char dbPath, char lPath, size_t maxLen){

6423 int len;

6424 int dbLen;

6425 int i;

6426

6427 #ifdef LOCKPROXYDIR

6428 len = strlcpy(lPath, LOCKPROXYDIR, maxLen);

6429 #else

6430 # ifdef _CS_DARWIN_USER_TEMP_DIR

6431 {

6432 if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){

6433 OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n",

6434 lPath, errno, getpid()));

6435 return SQLITE_IOERR_LOCK;

6436 }

6437 len = strlcat(lPath, "sqliteplocks", maxLen);

6438 }

6439 # else

6440 len = strlcpy(lPath, "/tmp/", maxLen);

6441 # endif

6442 #endif

6443

6444 if( lPath[len-1]!='/' ){

6445 len = strlcat(lPath, "/", maxLen);

6446 }

6447

6448 /* transform the db path to a unique cache name */

6449 dbLen = (int)strlen(dbPath);

6450 for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){

6451 char c = dbPath[i];

6452 lPath[i+len] = (c=='/')?'_':c;

6453 }

6454 lPath[i+len]='\0';

6455 strlcat(lPath, ":auto:", maxLen);

6456 OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, getpid()));

6457 return SQLITE_OK;

6458 }

6459

6460 /*

6461 ** Creates the lock file and any missing directories in lockPath

6462 */

6463 static int proxyCreateLockPath(const char *lockPath){

6464 int i, len;

6465 char buf[MAXPATHLEN];

6466 int start = 0;

6467

6468 assert(lockPath!=NULL);

6469 /* try to create all the intermediate directories */

6470 len = (int)strlen(lockPath);

6471 buf[0] = lockPath[0];

6472 for( i=1; i<len; i++ ){

6473 if( lockPath[i] == '/' && (i - start > 0) ){

6474 /* only mkdir if leaf dir != "." or "/" or ".." */

6475 if( i-start>2 \|\| (i-start==1 && buf[start] != '.' && buf[start] != '/')

6476 \|\| (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){

6477 buf[i]='\0';

6478 if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){

6479 int err=errno;

6480 if( err!=EEXIST ) {

6481 OSTRACE(("CREATELOCKPATH FAILED creating %s, "

6482 "'%s' proxy lock path=%s pid=%d\n",

6483 buf, strerror(err), lockPath, getpid()));

6484 return err;

6485 }

6486 }

6487 }

6488 start=i+1;

6489 }

6490 buf[i] = lockPath[i];

6491 }

6492 OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n", lockPath, getpid()));

6493 return 0;

6494 }

6495

6496 /*

6497 ** Create a new VFS file descriptor (stored in memory obtained from

6498 ** sqlite3_malloc) and open the file named "path" in the file descriptor.

6499 **

6500 ** The caller is responsible not only for closing the file descriptor

6501 ** but also for freeing the memory associated with the file descriptor.

6502 */

6503 static int proxyCreateUnixFile(

6504 const char path, / path for the new unixFile */

6505 unixFile *ppFile, / unixFile created and returned by ref */

6506 int islockfile /* if non zero missing dirs will be created */

6507 ) {

6508 int fd = -1;

6509 unixFile *pNew;

6510 int rc = SQLITE_OK;

6511 int openFlags = O_RDWR \| O_CREAT;

6512 sqlite3_vfs dummyVfs;

6513 int terrno = 0;

6514 UnixUnusedFd *pUnused = NULL;

6515

6516 /* 1. first try to open/create the file

6517 ** 2. if that fails, and this is a lock file (not-conch), try creating

6518 ** the parent directories and then try again.

6519 ** 3. if that fails, try to open the file read-only

6520 ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file

6521 */

6522 pUnused = findReusableFd(path, openFlags);

6523 if( pUnused ){

6524 fd = pUnused->fd;

6525 }else{

6526 pUnused = sqlite3_malloc(sizeof(*pUnused));

6527 if( !pUnused ){

6528 return SQLITE_NOMEM;

6529 }

6530 }

6531 if( fd<0 ){

6532 fd = robust_open(path, openFlags, 0);

6533 terrno = errno;

6534 if( fd<0 && errno==ENOENT && islockfile ){

6535 if( proxyCreateLockPath(path) == SQLITE_OK ){

6536 fd = robust_open(path, openFlags, 0);

6537 }

6538 }

6539 }

6540 if( fd<0 ){

6541 openFlags = O_RDONLY;

6542 fd = robust_open(path, openFlags, 0);

6543 terrno = errno;

6544 }

6545 if( fd<0 ){

6546 if( islockfile ){

6547 return SQLITE_BUSY;

6548 }

6549 switch (terrno) {

6550 case EACCES:

6551 return SQLITE_PERM;

6552 case EIO:

6553 return SQLITE_IOERR_LOCK; /* even though it is the conch */

6554 default:

6555 return SQLITE_CANTOPEN_BKPT;

6556 }

6557 }

6558

6559 pNew = (unixFile )sqlite3_malloc(sizeof(pNew));

6560 if( pNew==NULL ){

6561 rc = SQLITE_NOMEM;

6562 goto end_create_proxy;

6563 }

6564 memset(pNew, 0, sizeof(unixFile));

6565 pNew->openFlags = openFlags;

6566 memset(&dummyVfs, 0, sizeof(dummyVfs));

6567 dummyVfs.pAppData = (void*)&autolockIoFinder;

6568 dummyVfs.zName = "dummy";

6569 pUnused->fd = fd;

6570 pUnused->flags = openFlags;

6571 pNew->pUnused = pUnused;

6572

6573 rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0);

6574 if( rc==SQLITE_OK ){

6575 *ppFile = pNew;

6576 return SQLITE_OK;

6577 }

6578 end_create_proxy:

6579 robust_close(pNew, fd, __LINE__);

6580 sqlite3_free(pNew);

6581 sqlite3_free(pUnused);

6582 return rc;

6583 }

6584

6585 #ifdef SQLITE_TEST

6586 /* simulate multiple hosts by creating unique hostid file paths */

6587 int sqlite3_hostid_num = 0;

6588 #endif

6589

6590 #define PROXY_HOSTIDLEN 16 /* conch file host id length */

6591

6592 /* Not always defined in the headers as it ought to be */

6593 extern int gethostuuid(uuid_t id, const struct timespec *wait);

6594

6595 /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN

6596 ** bytes of writable memory.

6597 */

6598 static int proxyGetHostID(unsigned char pHostID, int pError){

6599 assert(PROXY_HOSTIDLEN == sizeof(uuid_t));

6600 memset(pHostID, 0, PROXY_HOSTIDLEN);

6601 #if defined(__MAX_OS_X_VERSION_MIN_REQUIRED)\

6602 && __MAC_OS_X_VERSION_MIN_REQUIRED<1050

6603 {

6604 static const struct timespec timeout = {1, 0}; /* 1 sec timeout */

6605 if( gethostuuid(pHostID, &timeout) ){

6606 int err = errno;

6607 if( pError ){

6608 *pError = err;

6609 }

6610 return SQLITE_IOERR;

6611 }

6612 }

6613 #else

6614 UNUSED_PARAMETER(pError);

6615 #endif

6616 #ifdef SQLITE_TEST

6617 /* simulate multiple hosts by creating unique hostid file paths */

6618 if( sqlite3_hostid_num != 0){

6619 pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF));

6620 }

6621 #endif

6622

6623 return SQLITE_OK;

6624 }

6625

6626 /* The conch file contains the header, host id and lock file path

6627 */

6628 #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */

6629 #define PROXY_HEADERLEN 1 /* conch file header length */

6630 #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN)

6631 #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN)

6632

6633 /*

6634 ** Takes an open conch file, copies the contents to a new path and then moves

6635 ** it back. The newly created file's file descriptor is assigned to the

6636 ** conch file structure and finally the original conch file descriptor is

6637 ** closed. Returns zero if successful.

6638 */

6639 static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){

6640 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

6641 unixFile *conchFile = pCtx->conchFile;

6642 char tPath[MAXPATHLEN];

6643 char buf[PROXY_MAXCONCHLEN];

6644 char *cPath = pCtx->conchFilePath;

6645 size_t readLen = 0;

6646 size_t pathLen = 0;

6647 char errmsg[64] = "";

6648 int fd = -1;

6649 int rc = -1;

6650 UNUSED_PARAMETER(myHostID);

6651

6652 /* create a new path by replace the trailing '-conch' with '-break' */

6653 pathLen = strlcpy(tPath, cPath, MAXPATHLEN);

6654 if( pathLen>MAXPATHLEN \|\| pathLen<6 \|\|

6655 (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){

6656 sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen);

6657 goto end_breaklock;

6658 }

6659 /* read the conch content */

6660 readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0);

6661 if( readLen<PROXY_PATHINDEX ){

6662 sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen);

6663 goto end_breaklock;

6664 }

6665 /* write it out to the temporary break file */

6666 fd = robust_open(tPath, (O_RDWR\|O_CREAT\|O_EXCL), 0);

6667 if( fd<0 ){

6668 sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);

6669 goto end_breaklock;

6670 }

6671 if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){

6672 sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno);

6673 goto end_breaklock;

6674 }

6675 if( rename(tPath, cPath) ){

6676 sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno);

6677 goto end_breaklock;

6678 }

6679 rc = 0;

6680 fprintf(stderr, "broke stale lock on %s\n", cPath);

6681 robust_close(pFile, conchFile->h, __LINE__);

6682 conchFile->h = fd;

6683 conchFile->openFlags = O_RDWR \| O_CREAT;

6684

6685 end_breaklock:

6686 if( rc ){

6687 if( fd>=0 ){

6688 osUnlink(tPath);

6689 robust_close(pFile, fd, __LINE__);

6690 }

6691 fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);

6692 }

6693 return rc;

6694 }

6695

6696 /* Take the requested lock on the conch file and break a stale lock if the

6697 ** host id matches.

6698 */

6699 static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){

6700 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

6701 unixFile *conchFile = pCtx->conchFile;

6702 int rc = SQLITE_OK;

6703 int nTries = 0;

6704 struct timespec conchModTime;

6705

6706 memset(&conchModTime, 0, sizeof(conchModTime));

6707 do {

6708 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);

6709 nTries ++;

6710 if( rc==SQLITE_BUSY ){

6711 /* If the lock failed (busy):

6712 * 1st try: get the mod time of the conch, wait 0.5s and try again.

6713 * 2nd try: fail if the mod time changed or host id is different, wait

6714 * 10 sec and try again

6715 * 3rd try: break the lock unless the mod time has changed.

6716 */

6717 struct stat buf;

6718 if( osFstat(conchFile->h, &buf) ){

6719 pFile->lastErrno = errno;

6720 return SQLITE_IOERR_LOCK;

6721 }

6722

6723 if( nTries==1 ){

6724 conchModTime = buf.st_mtimespec;

6725 usleep(500000); /* wait 0.5 sec and try the lock again*/

6726 continue;

6727 }

6728

6729 assert( nTries>1 );

6730 if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec \|\|

6731 conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){

6732 return SQLITE_BUSY;

6733 }

6734

6735 if( nTries==2 ){

6736 char tBuf[PROXY_MAXCONCHLEN];

6737 int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0);

6738 if( len<0 ){

6739 pFile->lastErrno = errno;

6740 return SQLITE_IOERR_LOCK;

6741 }

6742 if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){

6743 /* don't break the lock if the host id doesn't match */

6744 if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){

6745 return SQLITE_BUSY;

6746 }

6747 }else{

6748 /* don't break the lock on short read or a version mismatch */

6749 return SQLITE_BUSY;

6750 }

6751 usleep(10000000); /* wait 10 sec and try the lock again */

6752 continue;

6753 }

6754

6755 assert( nTries==3 );

6756 if( 0==proxyBreakConchLock(pFile, myHostID) ){

6757 rc = SQLITE_OK;

6758 if( lockType==EXCLUSIVE_LOCK ){

6759 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);

6760 }

6761 if( !rc ){

6762 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);

6763 }

6764 }

6765 }

6766 } while( rc==SQLITE_BUSY && nTries<3 );

6767

6768 return rc;

6769 }

6770

6771 /* Takes the conch by taking a shared lock and read the contents conch, if

6772 ** lockPath is non-NULL, the host ID and lock file path must match. A NULL

6773 ** lockPath means that the lockPath in the conch file will be used if the

6774 ** host IDs match, or a new lock path will be generated automatically

6775 ** and written to the conch file.

6776 */

6777 static int proxyTakeConch(unixFile *pFile){

6778 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

6779

6780 if( pCtx->conchHeld!=0 ){

6781 return SQLITE_OK;

6782 }else{

6783 unixFile *conchFile = pCtx->conchFile;

6784 uuid_t myHostID;

6785 int pError = 0;

6786 char readBuf[PROXY_MAXCONCHLEN];

6787 char lockPath[MAXPATHLEN];

6788 char *tempLockPath = NULL;

6789 int rc = SQLITE_OK;

6790 int createConch = 0;

6791 int hostIdMatch = 0;

6792 int readLen = 0;

6793 int tryOldLockPath = 0;

6794 int forceNewLockPath = 0;

6795

6796 OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h,

6797 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), getpid()));

6798

6799 rc = proxyGetHostID(myHostID, &pError);

6800 if( (rc&0xff)==SQLITE_IOERR ){

6801 pFile->lastErrno = pError;

6802 goto end_takeconch;

6803 }

6804 rc = proxyConchLock(pFile, myHostID, SHARED_LOCK);

6805 if( rc!=SQLITE_OK ){

6806 goto end_takeconch;

6807 }

6808 /* read the existing conch file */

6809 readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN);

6810 if( readLen<0 ){

6811 /* I/O error: lastErrno set by seekAndRead */

6812 pFile->lastErrno = conchFile->lastErrno;

6813 rc = SQLITE_IOERR_READ;

6814 goto end_takeconch;

6815 }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) \|\|

6816 readBuf[0]!=(char)PROXY_CONCHVERSION ){

6817 /* a short read or version format mismatch means we need to create a new

6818 ** conch file.

6819 */

6820 createConch = 1;

6821 }

6822 /* if the host id matches and the lock path already exists in the conch

6823 ** we'll try to use the path there, if we can't open that path, we'll

6824 ** retry with a new auto-generated path

6825 */

6826 do { /* in case we need to try again for an :auto: named lock file */

6827

6828 if( !createConch && !forceNewLockPath ){

6829 hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID,

6830 PROXY_HOSTIDLEN);

6831 /* if the conch has data compare the contents */

6832 if( !pCtx->lockProxyPath ){

6833 /* for auto-named local lock file, just check the host ID and we'll

6834 ** use the local lock file path that's already in there

6835 */

6836 if( hostIdMatch ){

6837 size_t pathLen = (readLen - PROXY_PATHINDEX);

6838

6839 if( pathLen>=MAXPATHLEN ){

6840 pathLen=MAXPATHLEN-1;

6841 }

6842 memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen);

6843 lockPath[pathLen] = 0;

6844 tempLockPath = lockPath;

6845 tryOldLockPath = 1;

6846 /* create a copy of the lock path if the conch is taken */

6847 goto end_takeconch;

6848 }

6849 }else if( hostIdMatch

6850 && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX],

6851 readLen-PROXY_PATHINDEX)

6852 ){

6853 /* conch host and lock path match */

6854 goto end_takeconch;

6855 }

6856 }

6857

6858 /* if the conch isn't writable and doesn't match, we can't take it */

6859 if( (conchFile->openFlags&O_RDWR) == 0 ){

6860 rc = SQLITE_BUSY;

6861 goto end_takeconch;

6862 }

6863

6864 /* either the conch didn't match or we need to create a new one */

6865 if( !pCtx->lockProxyPath ){

6866 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);

6867 tempLockPath = lockPath;

6868 /* create a copy of the lock path _only_ if the conch is taken */

6869 }

6870

6871 /* update conch with host and path (this will fail if other process

6872 ** has a shared lock already), if the host id matches, use the big

6873 ** stick.

6874 */

6875 futimes(conchFile->h, NULL);

6876 if( hostIdMatch && !createConch ){

6877 if( conchFile->pInode && conchFile->pInode->nShared>1 ){

6878 /* We are trying for an exclusive lock but another thread in this

6879 ** same process is still holding a shared lock. */

6880 rc = SQLITE_BUSY;

6881 } else {

6882 rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);

6883 }

6884 }else{

6885 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, EXCLUSIVE_LOCK) ;

6886 }

6887 if( rc==SQLITE_OK ){

6888 char writeBuffer[PROXY_MAXCONCHLEN];

6889 int writeSize = 0;

6890

6891 writeBuffer[0] = (char)PROXY_CONCHVERSION;

6892 memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);

6893 if( pCtx->lockProxyPath!=NULL ){

6894 strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN );

6895 }else{

6896 strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);

6897 }

6898 writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);

6899 robust_ftruncate(conchFile->h, writeSize);

6900 rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);

6901 fsync(conchFile->h);

6902 /* If we created a new conch file (not just updated the contents of a

6903 ** valid conch file), try to match the permissions of the database

6904 */

6905 if( rc==SQLITE_OK && createConch ){

6906 struct stat buf;

6907 int err = osFstat(pFile->h, &buf);

6908 if( err==0 ){

6909 mode_t cmode = buf.st_mode&(S_IRUSR\|S_IWUSR \| S_IRGRP\|S_IWGRP \|

6910 S_IROTH\|S_IWOTH);

6911 /* try to match the database file R/W permissions, ignore failure */

6912 #ifndef SQLITE_PROXY_DEBUG

6913 osFchmod(conchFile->h, cmode);

6914 #else

6915 do{

6916 rc = osFchmod(conchFile->h, cmode);

6917 }while( rc==(-1) && errno==EINTR );

6918 if( rc!=0 ){

6919 int code = errno;

6920 fprintf(stderr, "fchmod %o FAILED with %d %s\n",

6921 cmode, code, strerror(code));

6922 } else {

6923 fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);

6924 }

6925 }else{

6926 int code = errno;

6927 fprintf(stderr, "STAT FAILED[%d] with %d %s\n",

6928 err, code, strerror(code));

6929 #endif

6930 }

6931 }

6932 }

6933 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);

6934

6935 end_takeconch:

6936 OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));

6937 if( rc==SQLITE_OK && pFile->openFlags ){

6938 int fd;

6939 if( pFile->h>=0 ){

6940 robust_close(pFile, pFile->h, __LINE__);

6941 }

6942 pFile->h = -1;

6943 fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);

6944 OSTRACE(("TRANSPROXY: OPEN %d\n", fd));

6945 if( fd>=0 ){

6946 pFile->h = fd;

6947 }else{

6948 rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called

6949 during locking */

6950 }

6951 }

6952 if( rc==SQLITE_OK && !pCtx->lockProxy ){

6953 char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath;

6954 rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1);

6955 if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){

6956 /* we couldn't create the proxy lock file with the old lock file path

6957 ** so try again via auto-naming

6958 */

6959 forceNewLockPath = 1;

6960 tryOldLockPath = 0;

6961 continue; /* go back to the do {} while start point, try again */

6962 }

6963 }

6964 if( rc==SQLITE_OK ){

6965 /* Need to make a copy of path if we extracted the value

6966 ** from the conch file or the path was allocated on the stack

6967 */

6968 if( tempLockPath ){

6969 pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath);

6970 if( !pCtx->lockProxyPath ){

6971 rc = SQLITE_NOMEM;

6972 }

6973 }

6974 }

6975 if( rc==SQLITE_OK ){

6976 pCtx->conchHeld = 1;

6977

6978 if( pCtx->lockProxy->pMethod == &afpIoMethods ){

6979 afpLockingContext *afpCtx;

6980 afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext;

6981 afpCtx->dbPath = pCtx->lockProxyPath;

6982 }

6983 } else {

6984 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);

6985 }

6986 OSTRACE(("TAKECONCH %d %s\n", conchFile->h,

6987 rc==SQLITE_OK?"ok":"failed"));

6988 return rc;

6989 } while (1); /* in case we need to retry the :auto: lock file -

6990 ** we should never get here except via the 'continue' call. */

6991 }

6992 }

6993

6994 /*

6995 ** If pFile holds a lock on a conch file, then release that lock.

6996 */

6997 static int proxyReleaseConch(unixFile *pFile){

6998 int rc = SQLITE_OK; /* Subroutine return code */

6999 proxyLockingContext pCtx; / The locking context for the proxy lock */

7000 unixFile conchFile; / Name of the conch file */

7001

7002 pCtx = (proxyLockingContext *)pFile->lockingContext;

7003 conchFile = pCtx->conchFile;

7004 OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h,

7005 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),

7006 getpid()));

7007 if( pCtx->conchHeld>0 ){

7008 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);

7009 }

7010 pCtx->conchHeld = 0;

7011 OSTRACE(("RELEASECONCH %d %s\n", conchFile->h,

7012 (rc==SQLITE_OK ? "ok" : "failed")));

7013 return rc;

7014 }

7015

7016 /*

7017 ** Given the name of a database file, compute the name of its conch file.

7018 ** Store the conch filename in memory obtained from sqlite3_malloc().

7019 ** Make *pConchPath point to the new name. Return SQLITE_OK on success

7020 ** or SQLITE_NOMEM if unable to obtain memory.

7021 **

7022 ** The caller is responsible for ensuring that the allocated memory

7023 ** space is eventually freed.

7024 **

7025 ** *pConchPath is set to NULL if a memory allocation error occurs.

7026 */

7027 static int proxyCreateConchPathname(char dbPath, char *pConchPath){

7028 int i; /* Loop counter */

7029 int len = (int)strlen(dbPath); /* Length of database filename - dbPath */

7030 char conchPath; / buffer in which to construct conch name */

7031

7032 /* Allocate space for the conch filename and initialize the name to

7033 ** the name of the original database file. */

7034 pConchPath = conchPath = (char )sqlite3_malloc(len + 8);

7035 if( conchPath==0 ){

7036 return SQLITE_NOMEM;

7037 }

7038 memcpy(conchPath, dbPath, len+1);

7039

7040 /* now insert a "." before the last / character */

7041 for( i=(len-1); i>=0; i-- ){

7042 if( conchPath[i]=='/' ){

7043 i++;

7044 break;

7045 }

7046 }

7047 conchPath[i]='.';

7048 while ( i<len ){

7049 conchPath[i+1]=dbPath[i];

7050 i++;

7051 }

7052

7053 /* append the "-conch" suffix to the file */

7054 memcpy(&conchPath[i+1], "-conch", 7);

7055 assert( (int)strlen(conchPath) == len+7 );

7056

7057 return SQLITE_OK;

7058 }

7059

7060

7061 /* Takes a fully configured proxy locking-style unix file and switches

7062 ** the local lock file path

7063 */

7064 static int switchLockProxyPath(unixFile pFile, const char path) {

7065 proxyLockingContext pCtx = (proxyLockingContext)pFile->lockingContext;

7066 char *oldPath = pCtx->lockProxyPath;

7067 int rc = SQLITE_OK;

7068

7069 if( pFile->eFileLock!=NO_LOCK ){

7070 return SQLITE_BUSY;

7071 }

7072

7073 /* nothing to do if the path is NULL, :auto: or matches the existing path */

7074 if( !path \|\| path[0]=='\0' \|\| !strcmp(path, ":auto:") \|\|

7075 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){

7076 return SQLITE_OK;

7077 }else{

7078 unixFile *lockProxy = pCtx->lockProxy;

7079 pCtx->lockProxy=NULL;

7080 pCtx->conchHeld = 0;

7081 if( lockProxy!=NULL ){

7082 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);

7083 if( rc ) return rc;

7084 sqlite3_free(lockProxy);

7085 }

7086 sqlite3_free(oldPath);

7087 pCtx->lockProxyPath = sqlite3DbStrDup(0, path);

7088 }

7089

7090 return rc;

7091 }

7092

7093 /*

7094 ** pFile is a file that has been opened by a prior xOpen call. dbPath

7095 ** is a string buffer at least MAXPATHLEN+1 characters in size.

7096 **

7097 ** This routine find the filename associated with pFile and writes it

7098 ** int dbPath.

7099 */

7100 static int proxyGetDbPathForUnixFile(unixFile pFile, char dbPath){

7101 #if defined(__APPLE__)

7102 if( pFile->pMethod == &afpIoMethods ){

7103 /* afp style keeps a reference to the db path in the filePath field

7104 ** of the struct */

7105 assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN );

7106 strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPAT HLEN);

7107 } else

7108 #endif

7109 if( pFile->pMethod == &dotlockIoMethods ){

7110 /* dot lock style uses the locking context to store the dot lock

7111 ** file path */

7112 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);

7113 memcpy(dbPath, (char *)pFile->lockingContext, len + 1);

7114 }else{

7115 /* all other styles use the locking context to store the db file path */

7116 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );

7117 strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN);

7118 }

7119 return SQLITE_OK;

7120 }

7121

7122 /*

7123 ** Takes an already filled in unix file and alters it so all file locking

7124 ** will be performed on the local proxy lock file. The following fields

7125 ** are preserved in the locking context so that they can be restored and

7126 ** the unix structure properly cleaned up at close time:

7127 ** ->lockingContext

7128 ** ->pMethod

7129 */

7130 static int proxyTransformUnixFile(unixFile pFile, const char path) {

7131 proxyLockingContext *pCtx;

7132 char dbPath[MAXPATHLEN+1]; /* Name of the database file */

7133 char *lockPath=NULL;

7134 int rc = SQLITE_OK;

7135

7136 if( pFile->eFileLock!=NO_LOCK ){

7137 return SQLITE_BUSY;

7138 }

7139 proxyGetDbPathForUnixFile(pFile, dbPath);

7140 if( !path \|\| path[0]=='\0' \|\| !strcmp(path, ":auto:") ){

7141 lockPath=NULL;

7142 }else{

7143 lockPath=(char *)path;

7144 }

7145

7146 OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h,

7147 (lockPath ? lockPath : ":auto:"), getpid()));

7148

7149 pCtx = sqlite3_malloc( sizeof(*pCtx) );

7150 if( pCtx==0 ){

7151 return SQLITE_NOMEM;

7152 }

7153 memset(pCtx, 0, sizeof(*pCtx));

7154

7155 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath);

7156 if( rc==SQLITE_OK ){

7157 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0);

7158 if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){

7159 /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and

7160 ** (c) the file system is read-only, then enable no-locking access.

7161 ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts

7162 ** that openFlags will have only one of O_RDONLY or O_RDWR.

7163 */

7164 struct statfs fsInfo;

7165 struct stat conchInfo;

7166 int goLockless = 0;

7167

7168 if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) {

7169 int err = errno;

7170 if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){

7171 goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY;

7172 }

7173 }

7174 if( goLockless ){

7175 pCtx->conchHeld = -1; /* read only FS/ lockless */

7176 rc = SQLITE_OK;

7177 }

7178 }

7179 }

7180 if( rc==SQLITE_OK && lockPath ){

7181 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);

7182 }

7183

7184 if( rc==SQLITE_OK ){

7185 pCtx->dbPath = sqlite3DbStrDup(0, dbPath);

7186 if( pCtx->dbPath==NULL ){

7187 rc = SQLITE_NOMEM;

7188 }

7189 }

7190 if( rc==SQLITE_OK ){

7191 /* all memory is allocated, proxys are created and assigned,

7192 ** switch the locking context and pMethod then return.

7193 */

7194 pCtx->oldLockingContext = pFile->lockingContext;

7195 pFile->lockingContext = pCtx;

7196 pCtx->pOldMethod = pFile->pMethod;

7197 pFile->pMethod = &proxyIoMethods;

7198 }else{

7199 if( pCtx->conchFile ){

7200 pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);

7201 sqlite3_free(pCtx->conchFile);

7202 }

7203 sqlite3DbFree(0, pCtx->lockProxyPath);

7204 sqlite3_free(pCtx->conchFilePath);

7205 sqlite3_free(pCtx);

7206 }

7207 OSTRACE(("TRANSPROXY %d %s\n", pFile->h,

7208 (rc==SQLITE_OK ? "ok" : "failed")));

7209 return rc;

7210 }

7211

7212

7213 /*

7214 ** This routine handles sqlite3_file_control() calls that are specific

7215 ** to proxy locking.

7216 */

7217 static int proxyFileControl(sqlite3_file id, int op, void pArg){

7218 switch( op ){

7219 case SQLITE_GET_LOCKPROXYFILE: {

7220 unixFile pFile = (unixFile)id;

7221 if( pFile->pMethod == &proxyIoMethods ){

7222 proxyLockingContext pCtx = (proxyLockingContext)pFile->lockingContext;

7223 proxyTakeConch(pFile);

7224 if( pCtx->lockProxyPath ){

7225 (const char *)pArg = pCtx->lockProxyPath;

7226 }else{

7227 (const char *)pArg = ":auto: (not held)";

7228 }

7229 } else {

7230 (const char *)pArg = NULL;

7231 }

7232 return SQLITE_OK;

7233 }

7234 case SQLITE_SET_LOCKPROXYFILE: {

7235 unixFile pFile = (unixFile)id;

7236 int rc = SQLITE_OK;

7237 int isProxyStyle = (pFile->pMethod == &proxyIoMethods);

7238 if( pArg==NULL \|\| (const char *)pArg==0 ){

7239 if( isProxyStyle ){

7240 /* turn off proxy locking - not supported */

7241 rc = SQLITE_ERROR /SQLITE_PROTOCOL? SQLITE_MISUSE?/;

7242 }else{

7243 /* turn off proxy locking - already off - NOOP */

7244 rc = SQLITE_OK;

7245 }

7246 }else{

7247 const char proxyPath = (const char )pArg;

7248 if( isProxyStyle ){

7249 proxyLockingContext *pCtx =

7250 (proxyLockingContext*)pFile->lockingContext;

7251 if( !strcmp(pArg, ":auto:")

7252 \|\| (pCtx->lockProxyPath &&

7253 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN))

7254 ){

7255 rc = SQLITE_OK;

7256 }else{

7257 rc = switchLockProxyPath(pFile, proxyPath);

7258 }

7259 }else{

7260 /* turn on proxy file locking */

7261 rc = proxyTransformUnixFile(pFile, proxyPath);

7262 }

7263 }

7264 return rc;

7265 }

7266 default: {

7267 assert( 0 ); /* The call assures that only valid opcodes are sent */

7268 }

7269 }

7270 /NOTREACHED/

7271 return SQLITE_ERROR;

7272 }

7273

7274 /*

7275 ** Within this division (the proxying locking implementation) the procedures

7276 ** above this point are all utilities. The lock-related methods of the

7277 ** proxy-locking sqlite3_io_method object follow.

7278 */

7279

7280

7281 /*

7282 ** This routine checks if there is a RESERVED lock held on the specified

7283 ** file by this or any other process. If such a lock is held, set *pResOut

7284 ** to a non-zero value otherwise *pResOut is set to zero. The return value

7285 ** is set to SQLITE_OK unless an I/O error occurs during lock checking.

7286 */

7287 static int proxyCheckReservedLock(sqlite3_file id, int pResOut) {

7288 unixFile pFile = (unixFile)id;

7289 int rc = proxyTakeConch(pFile);

7290 if( rc==SQLITE_OK ){

7291 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

7292 if( pCtx->conchHeld>0 ){

7293 unixFile *proxy = pCtx->lockProxy;

7294 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);

7295 }else{ /* conchHeld < 0 is lockless */

7296 pResOut=0;

7297 }

7298 }

7299 return rc;

7300 }

7301

7302 /*

7303 ** Lock the file with the lock specified by parameter eFileLock - one

7304 ** of the following:

7305 **

7306 ** (1) SHARED_LOCK

7307 ** (2) RESERVED_LOCK

7308 ** (3) PENDING_LOCK

7309 ** (4) EXCLUSIVE_LOCK

7310 **

7311 ** Sometimes when requesting one lock state, additional lock states

7312 ** are inserted in between. The locking might fail on one of the later

7313 ** transitions leaving the lock state different from what it started but

7314 ** still short of its goal. The following chart shows the allowed

7315 ** transitions and the inserted intermediate states:

7316 **

7317 ** UNLOCKED -> SHARED

7318 ** SHARED -> RESERVED

7319 ** SHARED -> (PENDING) -> EXCLUSIVE

7320 ** RESERVED -> (PENDING) -> EXCLUSIVE

7321 ** PENDING -> EXCLUSIVE

7322 **

7323 ** This routine will only increase a lock. Use the sqlite3OsUnlock()

7324 ** routine to lower a locking level.

7325 */

7326 static int proxyLock(sqlite3_file *id, int eFileLock) {

7327 unixFile pFile = (unixFile)id;

7328 int rc = proxyTakeConch(pFile);

7329 if( rc==SQLITE_OK ){

7330 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

7331 if( pCtx->conchHeld>0 ){

7332 unixFile *proxy = pCtx->lockProxy;

7333 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock);

7334 pFile->eFileLock = proxy->eFileLock;

7335 }else{

7336 /* conchHeld < 0 is lockless */

7337 }

7338 }

7339 return rc;

7340 }

7341

7342

7343 /*

7344 ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock

7345 ** must be either NO_LOCK or SHARED_LOCK.

7346 **

7347 ** If the locking level of the file descriptor is already at or below

7348 ** the requested locking level, this routine is a no-op.

7349 */

7350 static int proxyUnlock(sqlite3_file *id, int eFileLock) {

7351 unixFile pFile = (unixFile)id;

7352 int rc = proxyTakeConch(pFile);

7353 if( rc==SQLITE_OK ){

7354 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

7355 if( pCtx->conchHeld>0 ){

7356 unixFile *proxy = pCtx->lockProxy;

7357 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock);

7358 pFile->eFileLock = proxy->eFileLock;

7359 }else{

7360 /* conchHeld < 0 is lockless */

7361 }

7362 }

7363 return rc;

7364 }

7365

7366 /*

7367 ** Close a file that uses proxy locks.

7368 */

7369 static int proxyClose(sqlite3_file *id) {

7370 if( id ){

7371 unixFile pFile = (unixFile)id;

7372 proxyLockingContext pCtx = (proxyLockingContext )pFile->lockingContext;

7373 unixFile *lockProxy = pCtx->lockProxy;

7374 unixFile *conchFile = pCtx->conchFile;

7375 int rc = SQLITE_OK;

7376

7377 if( lockProxy ){

7378 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);

7379 if( rc ) return rc;

7380 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);

7381 if( rc ) return rc;

7382 sqlite3_free(lockProxy);

7383 pCtx->lockProxy = 0;

7384 }

7385 if( conchFile ){

7386 if( pCtx->conchHeld ){

7387 rc = proxyReleaseConch(pFile);

7388 if( rc ) return rc;

7389 }

7390 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);

7391 if( rc ) return rc;

7392 sqlite3_free(conchFile);

7393 }

7394 sqlite3DbFree(0, pCtx->lockProxyPath);

7395 sqlite3_free(pCtx->conchFilePath);

7396 sqlite3DbFree(0, pCtx->dbPath);

7397 /* restore the original locking context and pMethod then close it */

7398 pFile->lockingContext = pCtx->oldLockingContext;

7399 pFile->pMethod = pCtx->pOldMethod;

7400 sqlite3_free(pCtx);

7401 return pFile->pMethod->xClose(id);

7402 }

7403 return SQLITE_OK;

7404 }

7405

7406

7407

7408 #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */

7409 /*

7410 ** The proxy locking style is intended for use with AFP filesystems.

7411 ** And since AFP is only supported on MacOSX, the proxy locking is also

7412 ** restricted to MacOSX.

7413 **

7414 **

7415 ***************** End of the proxy lock implementation ********************

7416 ******************************************************************************/

7417

7418 /*

7419 ** Initialize the operating system interface.

7420 **

7421 ** This routine registers all VFS implementations for unix-like operating

7422 ** systems. This routine, and the sqlite3_os_end() routine that follows,

7423 ** should be the only routines in this file that are visible from other

7424 ** files.

7425 **

7426 ** This routine is called once during SQLite initialization and by a

7427 ** single thread. The memory allocation and mutex subsystems have not

7428 ** necessarily been initialized when this routine is called, and so they

7429 ** should not be used.

7430 */

7431 int sqlite3_os_init(void){

7432 /*

7433 ** The following macro defines an initializer for an sqlite3_vfs object.

7434 ** The name of the VFS is NAME. The pAppData is a pointer to a pointer

7435 ** to the "finder" function. (pAppData is a pointer to a pointer because

7436 ** silly C90 rules prohibit a void* from being cast to a function pointer

7437 ** and so we have to go through the intermediate pointer to avoid problems

7438 ** when compiling with -pedantic-errors on GCC.)

7439 **

7440 ** The FINDER parameter to this macro is the name of the pointer to the

7441 ** finder-function. The finder-function returns a pointer to the

7442 ** sqlite_io_methods object that implements the desired locking

7443 ** behaviors. See the division above that contains the IOMETHODS

7444 ** macro for addition information on finder-functions.

7445 **

7446 ** Most finders simply return a pointer to a fixed sqlite3_io_methods

7447 ** object. But the "autolockIoFinder" available on MacOSX does a little

7448 ** more than that; it looks at the filesystem type that hosts the

7449 ** database file and tries to choose an locking method appropriate for

7450 ** that filesystem time.

7451 */

7452 #define UNIXVFS(VFSNAME, FINDER) { \

7453 3, /* iVersion */ \

7454 sizeof(unixFile), /* szOsFile */ \

7455 MAX_PATHNAME, /* mxPathname */ \

7456 0, /* pNext */ \

7457 VFSNAME, /* zName */ \

7458 (void)&FINDER, / pAppData */ \

7459 unixOpen, /* xOpen */ \

7460 unixDelete, /* xDelete */ \

7461 unixAccess, /* xAccess */ \

7462 unixFullPathname, /* xFullPathname */ \

7463 unixDlOpen, /* xDlOpen */ \

7464 unixDlError, /* xDlError */ \

7465 unixDlSym, /* xDlSym */ \

7466 unixDlClose, /* xDlClose */ \

7467 unixRandomness, /* xRandomness */ \

7468 unixSleep, /* xSleep */ \

7469 unixCurrentTime, /* xCurrentTime */ \

7470 unixGetLastError, /* xGetLastError */ \

7471 unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \

7472 unixSetSystemCall, /* xSetSystemCall */ \

7473 unixGetSystemCall, /* xGetSystemCall */ \

7474 unixNextSystemCall, /* xNextSystemCall */ \

7475 }

7476

7477 /*

7478 ** All default VFSes for unix are contained in the following array.

7479 **

7480 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified

7481 ** by the SQLite core when the VFS is registered. So the following

7482 ** array cannot be const.

7483 */

7484 static sqlite3_vfs aVfs[] = {

7485 #if SQLITE_ENABLE_LOCKING_STYLE && (OS_VXWORKS \|\| defined(__APPLE__))

7486 UNIXVFS("unix", autolockIoFinder ),

7487 #else

7488 UNIXVFS("unix", posixIoFinder ),

7489 #endif

7490 UNIXVFS("unix-none", nolockIoFinder ),

7491 UNIXVFS("unix-dotfile", dotlockIoFinder ),

7492 UNIXVFS("unix-excl", posixIoFinder ),

7493 #if OS_VXWORKS

7494 UNIXVFS("unix-namedsem", semIoFinder ),

7495 #endif

7496 #if SQLITE_ENABLE_LOCKING_STYLE

7497 UNIXVFS("unix-posix", posixIoFinder ),

7498 #if !OS_VXWORKS

7499 UNIXVFS("unix-flock", flockIoFinder ),

7500 #endif

7501 #endif

7502 #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)

7503 UNIXVFS("unix-afp", afpIoFinder ),

7504 UNIXVFS("unix-nfs", nfsIoFinder ),

7505 UNIXVFS("unix-proxy", proxyIoFinder ),

7506 #endif

7507 };

7508 unsigned int i; /* Loop counter */

7509

7510 /* Double-check that the aSyscall[] array has been constructed

7511 ** correctly. See ticket [bb3a86e890c8e96ab] */

7512 assert( ArraySize(aSyscall)==25 );

7513

7514 /* Register all VFSes defined in the aVfs[] array */

7515 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){

7516 sqlite3_vfs_register(&aVfs[i], i==0);

7517 }

7518 return SQLITE_OK;

7519 }

7520

7521 /*

7522 ** Shutdown the operating system interface.

7523 **

7524 ** Some operating systems might need to do some cleanup in this routine,

7525 ** to release dynamically allocated objects. But not on unix.

7526 ** This routine is a no-op for unix.

7527 */

7528 int sqlite3_os_end(void){

7529 return SQLITE_OK;

7530 }

7531

7532 #endif /* SQLITE_OS_UNIX */

OLD	NEW

« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/src/os_setup.h ('k') | third_party/sqlite/sqlite-src-3080704/src/os_win.h » ('j') | no next file with comments »