sandbox/linux/seccomp/sandbox.cc - Issue 3225010: Pull seccomp-sandbox in via DEPS rather than using an in-tree copy...

Side by Side Diff: sandbox/linux/seccomp/sandbox.cc

Issue 3225010: Pull seccomp-sandbox in via DEPS rather than using an in-tree copy... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "library.h"

6 #include "sandbox_impl.h"

7 #include "syscall_table.h"

8

9 namespace playground {

10

11 // Global variables

12 int Sandbox::proc_self_maps_ = -1;

13 enum Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

14 int Sandbox::pid_;

15 int Sandbox::processFdPub_;

16 int Sandbox::cloneFdPub_;

17 Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_;

18 Sandbox::ProtectedMap Sandbox::protectedMap_;

19 std::vector<SecureMem::Args*> Sandbox::secureMemPool_;

20

21 bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf,

22 size_t len) {

23 int fds[2], count = 0;

24 if (fd0 >= 0) { fds[count++] = fd0; }

25 if (fd1 >= 0) { fds[count++] = fd1; }

26 if (!count) {

27 return false;

28 }

29 char cmsg_buf[CMSG_SPACE(count*sizeof(int))];

30 memset(cmsg_buf, 0, sizeof(cmsg_buf));

31 struct SysCalls::kernel_iovec iov[2] = { { 0 } };

32 struct SysCalls::kernel_msghdr msg = { 0 };

33 int dummy = 0;

34 iov[0].iov_base = &dummy;

35 iov[0].iov_len = sizeof(dummy);

36 if (buf && len > 0) {

37 iov[1].iov_base = const_cast<void *>(buf);

38 iov[1].iov_len = len;

39 }

40 msg.msg_iov = iov;

41 msg.msg_iovlen = (buf && len > 0) ? 2 : 1;

42 msg.msg_control = cmsg_buf;

43 msg.msg_controllen = CMSG_LEN(count*sizeof(int));

44 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);

45 cmsg->cmsg_level = SOL_SOCKET;

46 cmsg->cmsg_type = SCM_RIGHTS;

47 cmsg->cmsg_len = CMSG_LEN(count*sizeof(int));

48 memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int));

49 SysCalls sys;

50 return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) ==

51 (ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0));

52 }

53

54 bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) {

55 int count = 0;

56 int *err = NULL;

57 if (fd0) {

58 count++;

59 err = fd0;

60 *fd0 = -1;

61 }

62 if (fd1) {

63 if (!count++) {

64 err = fd1;

65 }

66 *fd1 = -1;

67 }

68 if (!count) {

69 return false;

70 }

71 char cmsg_buf[CMSG_SPACE(count*sizeof(int))];

72 memset(cmsg_buf, 0, sizeof(cmsg_buf));

73 struct SysCalls::kernel_iovec iov[2] = { { 0 } };

74 struct SysCalls::kernel_msghdr msg = { 0 };

75 iov[0].iov_base = err;

76 iov[0].iov_len = sizeof(int);

77 if (buf && len && *len > 0) {

78 iov[1].iov_base = buf;

79 iov[1].iov_len = *len;

80 }

81 msg.msg_iov = iov;

82 msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1;

83 msg.msg_control = cmsg_buf;

84 msg.msg_controllen = CMSG_LEN(count*sizeof(int));

85 SysCalls sys;

86 ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0));

87 if (len) {

88 *len = bytes > (int)sizeof(int) ?

89 bytes - sizeof(int) : 0;

90 }

91 if (bytes != (ssize_t)(sizeof(int) + ((buf && len && len > 0) ? len : 0))){

92 *err = bytes >= 0 ? 0 : -EBADF;

93 return false;

94 }

95 if (*err) {

96 // "err" is the first four bytes of the payload. If these are non-zero,

97 // the sender on the other side of the socketpair sent us an errno value.

98 // We don't expect to get any file handles in this case.

99 return false;

100 }

101 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);

102 if ((msg.msg_flags & (MSG_TRUNC\|MSG_CTRUNC)) \|\|

103 !cmsg \|\|

104 cmsg->cmsg_level != SOL_SOCKET \|\|

105 cmsg->cmsg_type != SCM_RIGHTS \|\|

106 cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) {

107 *err = -EBADF;

108 return false;

109 }

110 if (fd1) { fd1 = ((int )CMSG_DATA(cmsg))[--count]; }

111 if (fd0) { fd0 = ((int )CMSG_DATA(cmsg))[--count]; }

112 return true;

113 }

114

115 void Sandbox::setupSignalHandlers() {

116 // Set SIGCHLD to SIG_DFL so that waitpid() can work

117 SysCalls sys;

118 struct SysCalls::kernel_sigaction sa;

119 memset(&sa, 0, sizeof(sa));

120 sa.sa_handler_ = SIG_DFL;

121 sys.sigaction(SIGCHLD, &sa, NULL);

122

123 // Set up SEGV handler for dealing with RDTSC instructions, system calls

124 // that have been rewritten to use INT0, for sigprocmask() emulation, for

125 // the creation of threads, and for user-provided SEGV handlers.

126 sa.sa_sigaction_ = segv();

127 sa.sa_flags = SA_SIGINFO \| SA_NODEFER;

128 sys.sigaction(SIGSEGV, &sa, &sa_segv_);

129

130 // Unblock SIGSEGV and SIGCHLD

131 SysCalls::kernel_sigset_t mask;

132 memset(&mask, 0x00, sizeof(mask));

133 mask.sig[0] \|= (1 << (SIGSEGV - 1)) \| (1 << (SIGCHLD - 1));

134 sys.sigprocmask(SIG_UNBLOCK, &mask, 0);

135 }

136

137 void (Sandbox::segv())(int signo, SysCalls::siginfo context, void *unused) {

138 void (fnc)(int signo, SysCalls::siginfo context, void *unused);

139 asm volatile(

140 "call 999f\n"

141 #if defined(__x86_64__)

142 // Inspect instruction at the point where the segmentation fault

143 // happened. If it is RDTSC, forward the request to the trusted

144 // thread.

145 "mov $-3, %%r14\n" // request for RDTSC

146 "mov 0xB0(%%rsp), %%r15\n" // %rip at time of segmentation fault

147 "cmpw $0x310F, (%%r15)\n" // RDTSC

148 "jz 0f\n"

149 "cmpw $0x010F, (%%r15)\n" // RDTSCP

150 "jnz 8f\n"

151 "cmpb $0xF9, 2(%%r15)\n"

152 "jnz 8f\n"

153 "mov $-4, %%r14\n" // request for RDTSCP

154 "0:"

155 #ifndef NDEBUG

156 "lea 100f(%%rip), %%rdi\n"

157 "call playground$debugMessage\n"

158 #endif

159 "sub $4, %%rsp\n"

160 "push %%r14\n"

161 "mov %%gs:16, %%edi\n" // fd = threadFdPub

162 "mov %%rsp, %%rsi\n" // buf = %rsp

163 "mov $4, %%edx\n" // len = sizeof(int)

164 "1:mov $1, %%eax\n" // NR_write

165 "syscall\n"

166 "cmp %%rax, %%rdx\n"

167 "jz 5f\n"

168 "cmp $-4, %%eax\n" // EINTR

169 "jz 1b\n"

170 "2:add $12, %%rsp\n"

171 "movq $0, 0x98(%%rsp)\n" // %rax at time of segmentation fault

172 "movq $0, 0x90(%%rsp)\n" // %rdx at time of segmentation fault

173 "cmpw $0x310F, (%%r15)\n" // RDTSC

174 "jz 3f\n"

175 "movq $0, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault

176 "3:addq $2, 0xB0(%%rsp)\n" // %rip at time of segmentation fault

177 "cmpw $0x010F, (%%r15)\n" // RDTSC

178 "jnz 4f\n"

179 "addq $1, 0xB0(%%rsp)\n" // %rip at time of segmentation fault

180 "4:ret\n"

181 "5:mov $12, %%edx\n" // len = 3*sizeof(int)

182 "6:mov $0, %%eax\n" // NR_read

183 "syscall\n"

184 "cmp $-4, %%eax\n" // EINTR

185 "jz 6b\n"

186 "cmp %%rax, %%rdx\n"

187 "jnz 2b\n"

188 "mov 0(%%rsp), %%eax\n"

189 "mov 4(%%rsp), %%edx\n"

190 "mov 8(%%rsp), %%ecx\n"

191 "add $12, %%rsp\n"

192 "mov %%rdx, 0x90(%%rsp)\n" // %rdx at time of segmentation fault

193 "cmpw $0x310F, (%%r15)\n" // RDTSC

194 "jz 7f\n"

195 "mov %%rcx, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault

196 "7:mov %%rax, 0x98(%%rsp)\n" // %rax at time of segmentation fault

197 "jmp 3b\n"

198

199 // If the instruction is INT 0, then this was probably the result

200 // of playground::Library being unable to find a way to safely

201 // rewrite the system call instruction. Retrieve the CPU register

202 // at the time of the segmentation fault and invoke syscallWrapper().

203 "8:cmpw $0x00CD, (%%r15)\n" // INT $0x0

204 "jnz 16f\n"

205 #ifndef NDEBUG

206 "lea 200f(%%rip), %%rdi\n"

207 "call playground$debugMessage\n"

208 #endif

209 "mov 0x98(%%rsp), %%rax\n" // %rax at time of segmentation fault

210 "mov 0x70(%%rsp), %%rdi\n" // %rdi at time of segmentation fault

211 "mov 0x78(%%rsp), %%rsi\n" // %rsi at time of segmentation fault

212 "mov 0x90(%%rsp), %%rdx\n" // %rdx at time of segmentation fault

213 "mov 0x40(%%rsp), %%r10\n" // %r10 at time of segmentation fault

214 "mov 0x30(%%rsp), %%r8\n" // %r8 at time of segmentation fault

215 "mov 0x38(%%rsp), %%r9\n" // %r9 at time of segmentation fault

216

217 // Handle rt_sigprocmask()

218 "cmp $14, %%rax\n" // NR_rt_sigprocmask

219 "jnz 12f\n"

220 "mov $-22, %%rax\n" // -EINVAL

221 "cmp $8, %%r10\n" // %r10 = sigsetsize (8 bytes = 64 signals)

222 "jl 7b\n"

223 "mov 0x130(%%rsp), %%r10\n" // signal mask at time of segmentation fault

224 "test %%rsi, %%rsi\n" // only set mask, if set is non-NULL

225 "jz 11f\n"

226 "mov 0(%%rsi), %%rsi\n"

227 "cmp $0, %%rdi\n" // %rdi = how (SIG_BLOCK)

228 "jnz 9f\n"

229 "or %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault

230 "jmp 11f\n"

231 "9:cmp $1, %%rdi\n" // %rdi = how (SIG_UNBLOCK)

232 "jnz 10f\n"

233 "xor $-1, %%rsi\n"

234 "and %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault

235 "jmp 11f\n"

236 "10:cmp $2, %%rdi\n" // %rdi = how (SIG_SETMASK)

237 "jnz 7b\n"

238 "mov %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault

239 "11:xor %%rax, %%rax\n"

240 "test %%rdx, %%rdx\n" // only return old mask, if set is non-NULL

241 "jz 7b\n"

242 "mov %%r10, 0(%%rdx)\n" // old_set

243 "jmp 7b\n"

244

245 // Handle rt_sigreturn()

246 "12:cmp $15, %%rax\n" // NR_rt_sigreturn

247 "jnz 14f\n"

248 "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault

249 "13:syscall\n" // rt_sigreturn() is unrestricted

250 "mov $66, %%edi\n" // rt_sigreturn() should never return

251 "mov $231, %%eax\n" // NR_exit_group

252 "jmp 13b\n"

253

254 // Copy signal frame onto new stack. See clone.cc for details

255 "14:cmp $56+0xF000, %%rax\n" // NR_clone + 0xF000

256 "jnz 15f\n"

257 "lea 8(%%rsp), %%rax\n" // retain stack frame upon returning

258 "mov %%rax, 0xA8(%%rsp)\n" // %rsp at time of segmentation fault

259 "jmp 7b\n"

260

261 // Forward system call to syscallWrapper()

262 "15:lea 7b(%%rip), %%rcx\n"

263 "push %%rcx\n"

264 "push 0xB8(%%rsp)\n" // %rip at time of segmentation fault

265 "lea playground$syscallWrapper(%%rip), %%rcx\n"

266 "jmp *%%rcx\n"

267

268 // In order to implement SA_NODEFER, we have to keep track of recursive

269 // calls to SIGSEGV handlers. This means we have to increment a counter

270 // before calling the user's signal handler, and decrement it on

271 // leaving the user's signal handler.

272 // Some signal handlers look at the return address of the signal

273 // stack, and more importantly "gdb" uses the call to rt_sigreturn()

274 // as a magic signature when doing stacktraces. So, we have to use

275 // a little more unusual code to regain control after the user's

276 // signal handler is done. We adjust the return address to point to

277 // non-executable memory. And when we trigger another SEGV we pop the

278 // extraneous signal frame and then call rt_sigreturn().

279 // N.B. We currently do not correctly adjust the SEGV counter, if the

280 // user's signal handler exits in way other than by returning (e.g. by

281 // directly calling rt_sigreturn(), or by calling siglongjmp()).

282 "16:lea 22f(%%rip), %%r14\n"

283 "cmp %%r14, %%r15\n"

284 "jnz 17f\n" // check if returning from user's handler

285 "decl %%gs:0x105C-0xE0\n" // decrement SEGV recursion counter

286 "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault

287 "mov $0xF, %%eax\n" // NR_rt_sigreturn

288 "syscall\n"

289

290 // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for

291 // what we are supposed to do.

292 "17:mov playground$sa_segv@GOTPCREL(%%rip), %%rax\n"

293 "cmp $0, 0(%%rax)\n" // SIG_DFL

294 "jz 18f\n"

295 "cmp $1, 0(%%rax)\n" // SIG_IGN

296 "jnz 19f\n" // can't really ignore synchronous signals

297

298 // Trigger the kernel's default signal disposition. The only way we can

299 // do this from seccomp mode is by blocking the signal and retriggering

300 // it.

301 "18:orb $4, 0x131(%%rsp)\n" // signal mask at time of segmentation fault

302 "ret\n"

303

304 // Check sa_flags:

305 // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they

306 // do not have any effect for SIGSEGV.

307 // - On x86-64, we can also ignore SA_SIGINFO, as the calling

308 // conventions for sa_handler() are a subset of the conventions for

309 // sa_sigaction().

310 // - We have to always register our signal handler with SA_NODEFER so

311 // that the user's signal handler can make system calls which might

312 // require additional help from our SEGV handler.

313 // - If the user's signal handler wasn't supposed to be SA_NODEFER, then

314 // we emulate this behavior by keeping track of a recursion counter.

315 //

316 // TODO(markus): If/when we add support for sigaltstack(), we have to

317 // handle SA_ONSTACK.

318 "19:cmpl $0, %%gs:0x105C-0xE0\n"// check if we failed inside of SEGV handler

319 "jnz 18b\n" // if so, then terminate program

320 "mov 0(%%rax), %%rbx\n" // sa_segv_.sa_sigaction

321 "mov 8(%%rax), %%rcx\n" // sa_segv_.sa_flags

322 "btl $31, %%ecx\n" // SA_RESETHAND

323 "jnc 20f\n"

324 "movq $0, 0(%%rax)\n" // set handler to SIG_DFL

325 "20:btl $30, %%ecx\n" // SA_NODEFER

326 "jc 21f\n"

327 "mov %%r14, 0(%%rsp)\n" // trigger a SEGV on return, so that we can

328 "incl %%gs:0x105C-0xE0\n" // clean up state; incr. recursion counter

329 "21:jmp *%%rbx\n" // call user's signal handler

330

331

332 // Non-executable version of the restorer function. We use this to

333 // trigger a SEGV upon returning from the user's signal handler, giving

334 // us an ability to clean up prior to returning from the SEGV handler.

335 ".pushsection .data\n" // move code into non-executable section

336 "22:mov $0xF, %%rax\n" // gdb looks for this signature when doing

337 "syscall\n" // backtraces

338 ".popsection\n"

339 #elif defined(__i386__)

340 // Inspect instruction at the point where the segmentation fault

341 // happened. If it is RDTSC, forward the request to the trusted

342 // thread.

343 "mov $-3, %%ebx\n" // request for RDTSC

344 "mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault

345 "cmpw $0x310F, (%%ebp)\n" // RDTSC

346 "jz 0f\n"

347 "cmpw $0x010F, (%%ebp)\n" // RDTSCP

348 "jnz 9f\n"

349 "cmpb $0xF9, 2(%%ebp)\n"

350 "jnz 9f\n"

351 "mov $-4, %%ebx\n" // request for RDTSCP

352 "0:"

353 #ifndef NDEBUG

354 "lea 100f, %%eax\n"

355 "push %%eax\n"

356 "call playground$debugMessage\n"

357 "sub $4, %%esp\n"

358 #else

359 "sub $8, %%esp\n" // allocate buffer for receiving timestamp

360 #endif

361 "push %%ebx\n"

362 "mov %%fs:16, %%ebx\n" // fd = threadFdPub

363 "mov %%esp, %%ecx\n" // buf = %esp

364 "mov $4, %%edx\n" // len = sizeof(int)

365 "1:mov %%edx, %%eax\n" // NR_write

366 "int $0x80\n"

367 "cmp %%eax, %%edx\n"

368 "jz 7f\n"

369 "cmp $-4, %%eax\n" // EINTR

370 "jz 1b\n"

371 "2:add $12, %%esp\n" // remove temporary buffer from stack

372 "xor %%eax, %%eax\n"

373 "movl $0, 0xC8(%%esp)\n" // %edx at time of segmentation fault

374 "cmpw $0x310F, (%%ebp)\n" // RDTSC

375 "jz 3f\n"

376 "movl $0, 0xCC(%%esp)\n" // %ecx at time of segmentation fault

377 "3:mov %%eax, 0xD0(%%esp)\n" // %eax at time of segmentation fault

378 "4:mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault

379 "addl $2, 0xDC(%%esp)\n" // %eip at time of segmentation fault

380 "cmpw $0x010F, (%%ebp)\n" // RDTSCP

381 "jnz 5f\n"

382 "addl $1, 0xDC(%%esp)\n" // %eip at time of segmentation fault

383 "5:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger

384 "mov 0x1CC(%%esp), %%eax\n" // push signal number

385 "push %%eax\n"

386 "lea 0x270(%%esp), %%esi\n" // copy siginfo register values

387 "lea 0x4(%%esp), %%edi\n" // into new location

388 "mov $22, %%ecx\n"

389 "cld\n"

390 "rep movsl\n"

391 "mov 0x2C8(%%esp), %%ebx\n" // copy first half of signal mask

392 "mov %%ebx, 0x54(%%esp)\n"

393 "lea 6f, %%esi\n" // copy "magic" restorer function

394 "push %%esi\n" // push restorer function

395 "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers

396 "movb $2, %%cl\n"

397 "rep movsl\n"

398 "ret\n" // return to restorer function

399

400 // The restorer function is sometimes used by gdb as a magic marker to

401 // recognize signal stack frames. Don't change any of the next three

402 // instructions.

403 "6:pop %%eax\n" // remove dummy argument (signo)

404 "mov $119, %%eax\n" // NR_sigreturn

405 "int $0x80\n"

406 "7:mov $12, %%edx\n" // len = 3*sizeof(int)

407 "8:mov $3, %%eax\n" // NR_read

408 "int $0x80\n"

409 "cmp $-4, %%eax\n" // EINTR

410 "jz 8b\n"

411 "cmp %%eax, %%edx\n"

412 "jnz 2b\n"

413 "pop %%eax\n"

414 "pop %%edx\n"

415 "pop %%ecx\n"

416 "mov %%edx, 0xC8(%%esp)\n" // %edx at time of segmentation fault

417 "cmpw $0x310F, (%%ebp)\n" // RDTSC

418 "jz 3b\n"

419 "mov %%ecx, 0xCC(%%esp)\n" // %ecx at time of segmentation fault

420 "jmp 3b\n"

421

422 // If the instruction is INT 0, then this was probably the result

423 // of playground::Library being unable to find a way to safely

424 // rewrite the system call instruction. Retrieve the CPU register

425 // at the time of the segmentation fault and invoke syscallWrapper().

426 "9:cmpw $0x00CD, (%%ebp)\n" // INT $0x0

427 "jnz 20f\n"

428 #ifndef NDEBUG

429 "lea 200f, %%eax\n"

430 "push %%eax\n"

431 "call playground$debugMessage\n"

432 "add $0x4, %%esp\n"

433 #endif

434 "mov 0xD0(%%esp), %%eax\n" // %eax at time of segmentation fault

435 "mov 0xC4(%%esp), %%ebx\n" // %ebx at time of segmentation fault

436 "mov 0xCC(%%esp), %%ecx\n" // %ecx at time of segmentation fault

437 "mov 0xC8(%%esp), %%edx\n" // %edx at time of segmentation fault

438 "mov 0xB8(%%esp), %%esi\n" // %esi at time of segmentation fault

439 "mov 0xB4(%%esp), %%edi\n" // %edi at time of segmentation fault

440 "mov 0xB2(%%esp), %%ebp\n" // %ebp at time of segmentation fault

441

442 // Handle sigprocmask() and rt_sigprocmask()

443 "cmp $175, %%eax\n" // NR_rt_sigprocmask

444 "jnz 10f\n"

445 "mov $-22, %%eax\n" // -EINVAL

446 "cmp $8, %%esi\n" // %esi = sigsetsize (8 bytes = 64 signals)

447 "jl 3b\n"

448 "jmp 11f\n"

449 "10:cmp $126, %%eax\n" // NR_sigprocmask

450 "jnz 15f\n"

451 "mov $-22, %%eax\n"

452 "11:mov 0xFC(%%esp), %%edi\n" // signal mask at time of segmentation fault

453 "mov 0x100(%%esp), %%ebp\n"

454 "test %%ecx, %%ecx\n" // only set mask, if set is non-NULL

455 "jz 14f\n"

456 "mov 0(%%ecx), %%esi\n"

457 "mov 4(%%ecx), %%ecx\n"

458 "cmp $0, %%ebx\n" // %ebx = how (SIG_BLOCK)

459 "jnz 12f\n"

460 "or %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault

461 "or %%ecx, 0x100(%%esp)\n"

462 "jmp 14f\n"

463 "12:cmp $1, %%ebx\n" // %ebx = how (SIG_UNBLOCK)

464 "jnz 13f\n"

465 "xor $-1, %%esi\n"

466 "xor $-1, %%ecx\n"

467 "and %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault

468 "and %%ecx, 0x100(%%esp)\n"

469 "jmp 14f\n"

470 "13:cmp $2, %%ebx\n" // %ebx = how (SIG_SETMASK)

471 "jnz 3b\n"

472 "mov %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault

473 "mov %%ecx, 0x100(%%esp)\n"

474 "14:xor %%eax, %%eax\n"

475 "test %%edx, %%edx\n" // only return old mask, if set is non-NULL

476 "jz 3b\n"

477 "mov %%edi, 0(%%edx)\n" // old_set

478 "mov %%ebp, 4(%%edx)\n"

479 "jmp 3b\n"

480

481 // Handle sigreturn() and rt_sigreturn()

482 // See syscall.cc for a discussion on how we can emulate rt_sigreturn()

483 // by calling sigreturn() with a suitably adjusted stack.

484 "15:cmp $119, %%eax\n" // NR_sigreturn

485 "jnz 17f\n"

486 "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

487 "16:int $0x80\n" // sigreturn() is unrestricted

488 "17:cmp $173, %%eax\n" // NR_rt_sigreturn

489 "jnz 18f\n"

490 "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

491 "sub $4, %%esp\n" // add fake return address

492 "jmp 4b\n"

493

494 // Copy signal frame onto new stack. In the process, we have to convert

495 // it from an RT signal frame to a legacy signal frame.

496 // See clone.cc for details

497 "18:cmp $120+0xF000, %%eax\n" // NR_clone + 0xF000

498 "jnz 19f\n"

499 "lea -0x1C8(%%esp), %%eax\n"// retain stack frame upon returning

500 "mov %%eax, 0xC0(%%esp)\n" // %esp at time of segmentation fault

501 "jmp 3b\n"

502

503 // Forward system call to syscallWrapper()

504 "19:call playground$syscallWrapper\n"

505 "jmp 3b\n"

506

507 // In order to implement SA_NODEFER, we have to keep track of recursive

508 // calls to SIGSEGV handlers. This means we have to increment a counter

509 // before calling the user's signal handler, and decrement it on

510 // leaving the user's signal handler.

511 // Some signal handlers look at the return address of the signal

512 // stack, and more importantly "gdb" uses the call to {,rt_}sigreturn()

513 // as a magic signature when doing stacktraces. So, we have to use

514 // a little more unusual code to regain control after the user's

515 // signal handler is done. We adjust the return address to point to

516 // non-executable memory. And when we trigger another SEGV we pop the

517 // extraneous signal frame and then call sigreturn().

518 // N.B. We currently do not correctly adjust the SEGV counter, if the

519 // user's signal handler exits in way other than by returning (e.g. by

520 // directly calling {,rt_}sigreturn(), or by calling siglongjmp()).

521 "20:lea 30f, %%edi\n" // rt-style restorer function

522 "lea 31f, %%esi\n" // legacy restorer function

523 "cmp %%ebp, %%edi\n" // check if returning from user's handler

524 "jnz 21f\n"

525 "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter

526 "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

527 "jmp 29f\n"

528 "21:cmp %%ebp, %%esi\n" // check if returning from user's handler

529 "jnz 22f\n"

530 "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter

531 "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

532 "jmp 6b\n"

533

534 // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for

535 // what we are supposed to do.

536 "22:lea playground$sa_segv, %%eax\n"

537 "cmp $0, 0(%%eax)\n" // SIG_DFL

538 "jz 23f\n"

539 "cmp $1, 0(%%eax)\n" // SIG_IGN

540 "jnz 24f\n" // can't really ignore synchronous signals

541

542 // Trigger the kernel's default signal disposition. The only way we can

543 // do this from seccomp mode is by blocking the signal and retriggering

544 // it.

545 "23:orb $4, 0xFD(%%esp)\n" // signal mask at time of segmentation fault

546 "jmp 5b\n"

547

548 // Check sa_flags:

549 // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they

550 // do not have any effect for SIGSEGV.

551 // - We have to always register our signal handler with SA_NODEFER so

552 // that the user's signal handler can make system calls which might

553 // require additional help from our SEGV handler.

554 // - If the user's signal handler wasn't supposed to be SA_NODEFER, then

555 // we emulate this behavior by keeping track of a recursion counter.

556 //

557 // TODO(markus): If/when we add support for sigaltstack(), we have to

558 // handle SA_ONSTACK.

559 "24:cmpl $0, %%fs:0x1040-0x58\n"// check if we failed inside of SEGV handler

560 "jnz 23b\n" // if so, then terminate program

561 "mov 0(%%eax), %%ebx\n" // sa_segv_.sa_sigaction

562 "mov 4(%%eax), %%ecx\n" // sa_segv_.sa_flags

563 "btl $31, %%ecx\n" // SA_RESETHAND

564 "jnc 25f\n"

565 "movl $0, 0(%%eax)\n" // set handler to SIG_DFL

566 "25:btl $30, %%ecx\n" // SA_NODEFER

567 "jc 28f\n"

568 "btl $2, %%ecx\n" // SA_SIGINFO

569 "jnc 26f\n"

570 "mov %%edi, 0(%%esp)\n" // trigger a SEGV on return

571 "incl %%fs:0x1040-0x58\n" // increment recursion counter

572 "jmp *%%ebx\n" // call user's signal handler

573 "26:mov %%esi, 0(%%esp)\n"

574 "incl %%fs:0x1040-0x58\n" // increment recursion counter

575

576 // We always register the signal handler to give us rt-style signal

577 // frames. But if the user asked for legacy signal frames, we must

578 // convert the signal frame prior to calling the user's signal handler.

579 "27:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger

580 "mov 0x1CC(%%esp), %%eax\n" // push signal number

581 "push %%eax\n"

582 "mov 0x1CC(%%esp), %%eax\n" // push restorer function

583 "push %%eax\n"

584 "lea 0x274(%%esp), %%esi\n" // copy siginfo register values

585 "lea 0x8(%%esp), %%edi\n" // into new location

586 "mov $22, %%ecx\n"

587 "cld\n"

588 "rep movsl\n"

589 "mov 0x2CC(%%esp), %%eax\n" // copy first half of signal mask

590 "mov %%eax, 0x58(%%esp)\n"

591 "lea 31f, %%esi\n"

592 "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers

593 "movb $2, %%cl\n"

594 "rep movsl\n"

595 "jmp *%%ebx\n" // call user's signal handler

596 "28:lea 6b, %%eax\n" // set appropriate restorer function

597 "mov %%eax, 0(%%esp)\n"

598 "btl $2, %%ecx\n" // SA_SIGINFO

599 "jnc 27b\n"

600 "lea 29f, %%eax\n"

601 "mov %%eax, 0(%%esp)\n" // set appropriate restorer function

602 "jmp *%%ebx\n" // call user's signal handler

603 "29:pushl $30f\n" // emulate rt_sigreturn()

604 "jmp 5b\n"

605

606 // Non-executable versions of the restorer function. We use these to

607 // trigger a SEGV upon returning from the user's signal handler, giving

608 // us an ability to clean up prior to returning from the SEGV handler.

609 ".pushsection .data\n" // move code into non-executable section

610 "30:mov $173, %%eax\n" // NR_rt_sigreturn

611 "int $0x80\n" // gdb looks for this signature when doing

612 ".byte 0\n" // backtraces

613 "31:pop %%eax\n"

614 "mov $119, %%eax\n" // NR_sigreturn

615 "int $0x80\n"

616 ".popsection\n"

617 #else

618 #error Unsupported target platform

619 #endif

620 ".pushsection \".rodata\"\n"

621 #ifndef NDEBUG

622 "100:.asciz \"RDTSC(P): Executing handler\\n\"\n"

623 "200:.asciz \"INT $0x0: Executing handler\\n\"\n"

624 #endif

625 ".popsection\n"

626 "999:pop %0\n"

627 : "=g"(fnc)

628 :

629 : "memory"

630 #if defined(__x86_64__)

631 , "rsp"

632 #elif defined(__i386__)

633 , "esp"

634 #endif

635 );

636 return fnc;

637 }

638

639 SecureMem::Args* Sandbox::getSecureMem() {

640 // Check trusted_thread.cc for the magic offset that gets us from the TLS

641 // to the beginning of the secure memory area.

642 SecureMem::Args* ret;

643 #if defined(__x86_64__)

644 asm volatile(

645 "movq %%gs:-0xE0, %0\n"

646 : "=q"(ret));

647 #elif defined(__i386__)

648 asm volatile(

649 "movl %%fs:-0x58, %0\n"

650 : "=r"(ret));

651 #else

652 #error Unsupported target platform

653 #endif

654 return ret;

655 }

656

657 void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) {

658 SysCalls sys;

659 if (sys.lseek(proc_self_maps, 0, SEEK_SET) \|\|

660 !sendFd(processFd, proc_self_maps, -1, NULL, 0)) {

661 failure:

662 die("Cannot access /proc/self/maps");

663 }

664 int dummy;

665 if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) {

666 goto failure;

667 }

668 }

669

670 int Sandbox::supportsSeccompSandbox(int proc_fd) {

671 if (status_ != STATUS_UNKNOWN) {

672 return status_ != STATUS_UNSUPPORTED;

673 }

674 int fds[2];

675 SysCalls sys;

676 if (sys.pipe(fds)) {

677 status_ = STATUS_UNSUPPORTED;

678 return 0;

679 }

680 pid_t pid;

681 switch ((pid = sys.fork())) {

682 case -1:

683 status_ = STATUS_UNSUPPORTED;

684 return 0;

685 case 0: {

686 int devnull = sys.open("/dev/null", O_RDWR, 0);

687 if (devnull >= 0) {

688 sys.dup2(devnull, 0);

689 sys.dup2(devnull, 1);

690 sys.dup2(devnull, 2);

691 sys.close(devnull);

692 }

693 if (proc_fd >= 0) {

694 setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0));

695 }

696 startSandbox();

697 write(sys, fds[1], "", 1);

698

699 // Try to tell the trusted thread to shut down the entire process in an

700 // orderly fashion

701 defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0);

702

703 // If that did not work (e.g. because the kernel does not know about the

704 // exit_group() system call), make a direct _exit() system call instead.

705 // This system call is unrestricted in seccomp mode, so it will always

706 // succeed. Normally, we don't like it, because unlike exit_group() it

707 // does not terminate any other thread. But since we know that

708 // exit_group() exists in all kernels which support kernel-level threads,

709 // this is OK we only get here for old kernels where _exit() is OK.

710 sys._exit(0);

711 }

712 default:

713 NOINTR_SYS(sys.close(fds[1]));

714 char ch;

715 if (read(sys, fds[0], &ch, 1) != 1) {

716 status_ = STATUS_UNSUPPORTED;

717 } else {

718 status_ = STATUS_AVAILABLE;

719 }

720 int rc;

721 NOINTR_SYS(sys.waitpid(pid, &rc, 0));

722 NOINTR_SYS(sys.close(fds[0]));

723 return status_ != STATUS_UNSUPPORTED;

724 }

725 }

726

727 void Sandbox::setProcSelfMaps(int proc_self_maps) {

728 proc_self_maps_ = proc_self_maps;

729 }

730

731 void Sandbox::startSandbox() {

732 if (status_ == STATUS_UNSUPPORTED) {

733 die("The seccomp sandbox is not supported on this computer");

734 } else if (status_ == STATUS_ENABLED) {

735 return;

736 }

737

738 SysCalls sys;

739 if (proc_self_maps_ < 0) {

740 proc_self_maps_ = sys.open("/proc/self/maps", O_RDONLY, 0);

741 if (proc_self_maps_ < 0) {

742 die("Cannot access \"/proc/self/maps\"");

743 }

744 }

745

746 // The pid is unchanged for the entire program, so we can retrieve it once

747 // and store it in a global variable.

748 pid_ = sys.getpid();

749

750 // Block all signals, except for the RDTSC handler

751 setupSignalHandlers();

752

753 // Get socketpairs for talking to the trusted process

754 int pair[4];

755 if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) \|\|

756 sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) {

757 die("Failed to create trusted thread");

758 }

759 processFdPub_ = pair[0];

760 cloneFdPub_ = pair[2];

761 SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1],

762 pair[2], pair[3]);

763

764 // We find all libraries that have system calls and redirect the system

765 // calls to the sandbox. If we miss any system calls, the application will be

766 // terminated by the kernel's seccomp code. So, from a security point of

767 // view, if this code fails to identify system calls, we are still behaving

768 // correctly.

769 {

770 Maps maps(proc_self_maps_);

771 const char *libs[] = { "ld", "libc", "librt", "libpthread", NULL };

772

773 // Intercept system calls in the VDSO segment (if any). This has to happen

774 // before intercepting system calls in any of the other libraries, as

775 // the main kernel entry point might be inside of the VDSO and we need to

776 // determine its address before we can compare it to jumps from inside

777 // other libraries.

778 for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){

779 Library* library = *iter;

780 if (library->isVDSO() && library->parseElf()) {

781 library->makeWritable(true);

782 library->patchSystemCalls();

783 library->makeWritable(false);

784 break;

785 }

786 }

787

788 // Intercept system calls in libraries that are known to have them.

789 for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){

790 Library* library = *iter;

791 const char* mapping = iter.name().c_str();

792

793 // Find the actual base name of the mapped library by skipping past any

794 // SPC and forward-slashes. We don't want to accidentally find matches,

795 // because the directory name included part of our well-known lib names.

796 //

797 // Typically, prior to pruning, entries would look something like this:

798 // 08:01 2289011 /lib/libc-2.7.so

799 for (const char delim = " /"; delim; ++delim) {

800 const char* skip = strrchr(mapping, *delim);

801 if (skip) {

802 mapping = skip + 1;

803 }

804 }

805

806 for (const char *ptr = libs; ptr; ptr++) {

807 const char name = strstr(mapping, ptr);

808 if (name == mapping) {

809 char ch = name[strlen(*ptr)];

810 if (ch < 'A' \|\| (ch > 'Z' && ch < 'a') \|\| ch > 'z') {

811 if (library->parseElf()) {

812 library->makeWritable(true);

813 library->patchSystemCalls();

814 library->makeWritable(false);

815 break;

816 }

817 }

818 }

819 }

820 }

821 }

822

823 // Take a snapshot of the current memory mappings. These mappings will be

824 // off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls.

825 snapshotMemoryMappings(processFdPub_, proc_self_maps_);

826 NOINTR_SYS(sys.close(proc_self_maps_));

827 proc_self_maps_ = -1;

828

829 // Creating the trusted thread enables sandboxing

830 createTrustedThread(processFdPub_, cloneFdPub_, secureMem);

831

832 // We can no longer check for sandboxing support at this point, but we also

833 // know for a fact that it is available (as we just turned it on). So update

834 // the status to reflect this information.

835 status_ = STATUS_ENABLED;

836 }

837

838 } // namespace

OLD	NEW

« no previous file with comments | « sandbox/linux/seccomp/sandbox.h ('k') | sandbox/linux/seccomp/sandbox_impl.h » ('j') | no next file with comments »