sandbox/linux/seccomp/sandbox.cc - Issue 3225010: Pull seccomp-sandbox in via DEPS rather than using an in-tree copy...

Unified Diff: sandbox/linux/seccomp/sandbox.cc

Issue 3225010: Pull seccomp-sandbox in via DEPS rather than using an in-tree copy... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: '' Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: sandbox/linux/seccomp/sandbox.cc

===================================================================

--- sandbox/linux/seccomp/sandbox.cc (revision 57969)

+++ sandbox/linux/seccomp/sandbox.cc (working copy)

@@ -1,838 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "library.h"

-#include "sandbox_impl.h"

-#include "syscall_table.h"

-namespace playground {

-// Global variables

-int Sandbox::proc_self_maps_ = -1;

-enum Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;

-int Sandbox::pid_;

-int Sandbox::processFdPub_;

-int Sandbox::cloneFdPub_;

-Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_;

-Sandbox::ProtectedMap Sandbox::protectedMap_;

-std::vector<SecureMem::Args*> Sandbox::secureMemPool_;

-bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf,

- size_t len) {

- int fds[2], count = 0;

- if (fd0 >= 0) { fds[count++] = fd0; }

- if (fd1 >= 0) { fds[count++] = fd1; }

- if (!count) {

- return false;

- }

- char cmsg_buf[CMSG_SPACE(count*sizeof(int))];

- memset(cmsg_buf, 0, sizeof(cmsg_buf));

- struct SysCalls::kernel_iovec iov[2] = { { 0 } };

- struct SysCalls::kernel_msghdr msg = { 0 };

- int dummy = 0;

- iov[0].iov_base = &dummy;

- iov[0].iov_len = sizeof(dummy);

- if (buf && len > 0) {

- iov[1].iov_base = const_cast<void *>(buf);

- iov[1].iov_len = len;

- }

- msg.msg_iov = iov;

- msg.msg_iovlen = (buf && len > 0) ? 2 : 1;

- msg.msg_control = cmsg_buf;

- msg.msg_controllen = CMSG_LEN(count*sizeof(int));

- struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);

- cmsg->cmsg_level = SOL_SOCKET;

- cmsg->cmsg_type = SCM_RIGHTS;

- cmsg->cmsg_len = CMSG_LEN(count*sizeof(int));

- memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int));

- SysCalls sys;

- return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) ==

- (ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0));

-bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) {

- int count = 0;

- int *err = NULL;

- if (fd0) {

- count++;

- err = fd0;

- *fd0 = -1;

- }

- if (fd1) {

- if (!count++) {

- err = fd1;

- }

- *fd1 = -1;

- }

- if (!count) {

- return false;

- }

- char cmsg_buf[CMSG_SPACE(count*sizeof(int))];

- memset(cmsg_buf, 0, sizeof(cmsg_buf));

- struct SysCalls::kernel_iovec iov[2] = { { 0 } };

- struct SysCalls::kernel_msghdr msg = { 0 };

- iov[0].iov_base = err;

- iov[0].iov_len = sizeof(int);

- if (buf && len && *len > 0) {

- iov[1].iov_base = buf;

- iov[1].iov_len = *len;

- }

- msg.msg_iov = iov;

- msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1;

- msg.msg_control = cmsg_buf;

- msg.msg_controllen = CMSG_LEN(count*sizeof(int));

- SysCalls sys;

- ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0));

- if (len) {

- *len = bytes > (int)sizeof(int) ?

- bytes - sizeof(int) : 0;

- }

- if (bytes != (ssize_t)(sizeof(int) + ((buf && len && *len > 0) ? *len : 0))){

- *err = bytes >= 0 ? 0 : -EBADF;

- return false;

- }

- if (*err) {

- // "err" is the first four bytes of the payload. If these are non-zero,

- // the sender on the other side of the socketpair sent us an errno value.

- // We don't expect to get any file handles in this case.

- return false;

- }

- struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);

- if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||

- !cmsg ||

- cmsg->cmsg_level != SOL_SOCKET ||

- cmsg->cmsg_type != SCM_RIGHTS ||

- cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) {

- *err = -EBADF;

- return false;

- }

- if (fd1) { *fd1 = ((int *)CMSG_DATA(cmsg))[--count]; }

- if (fd0) { *fd0 = ((int *)CMSG_DATA(cmsg))[--count]; }

- return true;

-void Sandbox::setupSignalHandlers() {

- // Set SIGCHLD to SIG_DFL so that waitpid() can work

- SysCalls sys;

- struct SysCalls::kernel_sigaction sa;

- memset(&sa, 0, sizeof(sa));

- sa.sa_handler_ = SIG_DFL;

- sys.sigaction(SIGCHLD, &sa, NULL);

- // Set up SEGV handler for dealing with RDTSC instructions, system calls

- // that have been rewritten to use INT0, for sigprocmask() emulation, for

- // the creation of threads, and for user-provided SEGV handlers.

- sa.sa_sigaction_ = segv();

- sa.sa_flags = SA_SIGINFO | SA_NODEFER;

- sys.sigaction(SIGSEGV, &sa, &sa_segv_);

- // Unblock SIGSEGV and SIGCHLD

- SysCalls::kernel_sigset_t mask;

- memset(&mask, 0x00, sizeof(mask));

- mask.sig[0] |= (1 << (SIGSEGV - 1)) | (1 << (SIGCHLD - 1));

- sys.sigprocmask(SIG_UNBLOCK, &mask, 0);

-void (*Sandbox::segv())(int signo, SysCalls::siginfo *context, void *unused) {

- void (*fnc)(int signo, SysCalls::siginfo *context, void *unused);

- asm volatile(

- "call 999f\n"

-#if defined(__x86_64__)

- // Inspect instruction at the point where the segmentation fault

- // happened. If it is RDTSC, forward the request to the trusted

- // thread.

- "mov $-3, %%r14\n" // request for RDTSC

- "mov 0xB0(%%rsp), %%r15\n" // %rip at time of segmentation fault

- "cmpw $0x310F, (%%r15)\n" // RDTSC

- "jz 0f\n"

- "cmpw $0x010F, (%%r15)\n" // RDTSCP

- "jnz 8f\n"

- "cmpb $0xF9, 2(%%r15)\n"

- "jnz 8f\n"

- "mov $-4, %%r14\n" // request for RDTSCP

- "0:"

-#ifndef NDEBUG

- "lea 100f(%%rip), %%rdi\n"

- "call playground$debugMessage\n"

-#endif

- "sub $4, %%rsp\n"

- "push %%r14\n"

- "mov %%gs:16, %%edi\n" // fd = threadFdPub

- "mov %%rsp, %%rsi\n" // buf = %rsp

- "mov $4, %%edx\n" // len = sizeof(int)

- "1:mov $1, %%eax\n" // NR_write

- "syscall\n"

- "cmp %%rax, %%rdx\n"

- "jz 5f\n"

- "cmp $-4, %%eax\n" // EINTR

- "jz 1b\n"

- "2:add $12, %%rsp\n"

- "movq $0, 0x98(%%rsp)\n" // %rax at time of segmentation fault

- "movq $0, 0x90(%%rsp)\n" // %rdx at time of segmentation fault

- "cmpw $0x310F, (%%r15)\n" // RDTSC

- "jz 3f\n"

- "movq $0, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault

- "3:addq $2, 0xB0(%%rsp)\n" // %rip at time of segmentation fault

- "cmpw $0x010F, (%%r15)\n" // RDTSC

- "jnz 4f\n"

- "addq $1, 0xB0(%%rsp)\n" // %rip at time of segmentation fault

- "4:ret\n"

- "5:mov $12, %%edx\n" // len = 3*sizeof(int)

- "6:mov $0, %%eax\n" // NR_read

- "syscall\n"

- "cmp $-4, %%eax\n" // EINTR

- "jz 6b\n"

- "cmp %%rax, %%rdx\n"

- "jnz 2b\n"

- "mov 0(%%rsp), %%eax\n"

- "mov 4(%%rsp), %%edx\n"

- "mov 8(%%rsp), %%ecx\n"

- "add $12, %%rsp\n"

- "mov %%rdx, 0x90(%%rsp)\n" // %rdx at time of segmentation fault

- "cmpw $0x310F, (%%r15)\n" // RDTSC

- "jz 7f\n"

- "mov %%rcx, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault

- "7:mov %%rax, 0x98(%%rsp)\n" // %rax at time of segmentation fault

- "jmp 3b\n"

- // If the instruction is INT 0, then this was probably the result

- // of playground::Library being unable to find a way to safely

- // rewrite the system call instruction. Retrieve the CPU register

- // at the time of the segmentation fault and invoke syscallWrapper().

- "8:cmpw $0x00CD, (%%r15)\n" // INT $0x0

- "jnz 16f\n"

-#ifndef NDEBUG

- "lea 200f(%%rip), %%rdi\n"

- "call playground$debugMessage\n"

-#endif

- "mov 0x98(%%rsp), %%rax\n" // %rax at time of segmentation fault

- "mov 0x70(%%rsp), %%rdi\n" // %rdi at time of segmentation fault

- "mov 0x78(%%rsp), %%rsi\n" // %rsi at time of segmentation fault

- "mov 0x90(%%rsp), %%rdx\n" // %rdx at time of segmentation fault

- "mov 0x40(%%rsp), %%r10\n" // %r10 at time of segmentation fault

- "mov 0x30(%%rsp), %%r8\n" // %r8 at time of segmentation fault

- "mov 0x38(%%rsp), %%r9\n" // %r9 at time of segmentation fault

- // Handle rt_sigprocmask()

- "cmp $14, %%rax\n" // NR_rt_sigprocmask

- "jnz 12f\n"

- "mov $-22, %%rax\n" // -EINVAL

- "cmp $8, %%r10\n" // %r10 = sigsetsize (8 bytes = 64 signals)

- "jl 7b\n"

- "mov 0x130(%%rsp), %%r10\n" // signal mask at time of segmentation fault

- "test %%rsi, %%rsi\n" // only set mask, if set is non-NULL

- "jz 11f\n"

- "mov 0(%%rsi), %%rsi\n"

- "cmp $0, %%rdi\n" // %rdi = how (SIG_BLOCK)

- "jnz 9f\n"

- "or %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault

- "jmp 11f\n"

- "9:cmp $1, %%rdi\n" // %rdi = how (SIG_UNBLOCK)

- "jnz 10f\n"

- "xor $-1, %%rsi\n"

- "and %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault

- "jmp 11f\n"

- "10:cmp $2, %%rdi\n" // %rdi = how (SIG_SETMASK)

- "jnz 7b\n"

- "mov %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault

- "11:xor %%rax, %%rax\n"

- "test %%rdx, %%rdx\n" // only return old mask, if set is non-NULL

- "jz 7b\n"

- "mov %%r10, 0(%%rdx)\n" // old_set

- "jmp 7b\n"

- // Handle rt_sigreturn()

- "12:cmp $15, %%rax\n" // NR_rt_sigreturn

- "jnz 14f\n"

- "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault

- "13:syscall\n" // rt_sigreturn() is unrestricted

- "mov $66, %%edi\n" // rt_sigreturn() should never return

- "mov $231, %%eax\n" // NR_exit_group

- "jmp 13b\n"

- // Copy signal frame onto new stack. See clone.cc for details

- "14:cmp $56+0xF000, %%rax\n" // NR_clone + 0xF000

- "jnz 15f\n"

- "lea 8(%%rsp), %%rax\n" // retain stack frame upon returning

- "mov %%rax, 0xA8(%%rsp)\n" // %rsp at time of segmentation fault

- "jmp 7b\n"

- // Forward system call to syscallWrapper()

- "15:lea 7b(%%rip), %%rcx\n"

- "push %%rcx\n"

- "push 0xB8(%%rsp)\n" // %rip at time of segmentation fault

- "lea playground$syscallWrapper(%%rip), %%rcx\n"

- "jmp *%%rcx\n"

- // In order to implement SA_NODEFER, we have to keep track of recursive

- // calls to SIGSEGV handlers. This means we have to increment a counter

- // before calling the user's signal handler, and decrement it on

- // leaving the user's signal handler.

- // Some signal handlers look at the return address of the signal

- // stack, and more importantly "gdb" uses the call to rt_sigreturn()

- // as a magic signature when doing stacktraces. So, we have to use

- // a little more unusual code to regain control after the user's

- // signal handler is done. We adjust the return address to point to

- // non-executable memory. And when we trigger another SEGV we pop the

- // extraneous signal frame and then call rt_sigreturn().

- // N.B. We currently do not correctly adjust the SEGV counter, if the

- // user's signal handler exits in way other than by returning (e.g. by

- // directly calling rt_sigreturn(), or by calling siglongjmp()).

- "16:lea 22f(%%rip), %%r14\n"

- "cmp %%r14, %%r15\n"

- "jnz 17f\n" // check if returning from user's handler

- "decl %%gs:0x105C-0xE0\n" // decrement SEGV recursion counter

- "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault

- "mov $0xF, %%eax\n" // NR_rt_sigreturn

- "syscall\n"

- // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for

- // what we are supposed to do.

- "17:mov playground$sa_segv@GOTPCREL(%%rip), %%rax\n"

- "cmp $0, 0(%%rax)\n" // SIG_DFL

- "jz 18f\n"

- "cmp $1, 0(%%rax)\n" // SIG_IGN

- "jnz 19f\n" // can't really ignore synchronous signals

- // Trigger the kernel's default signal disposition. The only way we can

- // do this from seccomp mode is by blocking the signal and retriggering

- // it.

- "18:orb $4, 0x131(%%rsp)\n" // signal mask at time of segmentation fault

- "ret\n"

- // Check sa_flags:

- // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they

- // do not have any effect for SIGSEGV.

- // - On x86-64, we can also ignore SA_SIGINFO, as the calling

- // conventions for sa_handler() are a subset of the conventions for

- // sa_sigaction().

- // - We have to always register our signal handler with SA_NODEFER so

- // that the user's signal handler can make system calls which might

- // require additional help from our SEGV handler.

- // - If the user's signal handler wasn't supposed to be SA_NODEFER, then

- // we emulate this behavior by keeping track of a recursion counter.

- //

- // TODO(markus): If/when we add support for sigaltstack(), we have to

- // handle SA_ONSTACK.

- "19:cmpl $0, %%gs:0x105C-0xE0\n"// check if we failed inside of SEGV handler

- "jnz 18b\n" // if so, then terminate program

- "mov 0(%%rax), %%rbx\n" // sa_segv_.sa_sigaction

- "mov 8(%%rax), %%rcx\n" // sa_segv_.sa_flags

- "btl $31, %%ecx\n" // SA_RESETHAND

- "jnc 20f\n"

- "movq $0, 0(%%rax)\n" // set handler to SIG_DFL

- "20:btl $30, %%ecx\n" // SA_NODEFER

- "jc 21f\n"

- "mov %%r14, 0(%%rsp)\n" // trigger a SEGV on return, so that we can

- "incl %%gs:0x105C-0xE0\n" // clean up state; incr. recursion counter

- "21:jmp *%%rbx\n" // call user's signal handler

- // Non-executable version of the restorer function. We use this to

- // trigger a SEGV upon returning from the user's signal handler, giving

- // us an ability to clean up prior to returning from the SEGV handler.

- ".pushsection .data\n" // move code into non-executable section

- "22:mov $0xF, %%rax\n" // gdb looks for this signature when doing

- "syscall\n" // backtraces

- ".popsection\n"

-#elif defined(__i386__)

- // Inspect instruction at the point where the segmentation fault

- // happened. If it is RDTSC, forward the request to the trusted

- // thread.

- "mov $-3, %%ebx\n" // request for RDTSC

- "mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault

- "cmpw $0x310F, (%%ebp)\n" // RDTSC

- "jz 0f\n"

- "cmpw $0x010F, (%%ebp)\n" // RDTSCP

- "jnz 9f\n"

- "cmpb $0xF9, 2(%%ebp)\n"

- "jnz 9f\n"

- "mov $-4, %%ebx\n" // request for RDTSCP

- "0:"

-#ifndef NDEBUG

- "lea 100f, %%eax\n"

- "push %%eax\n"

- "call playground$debugMessage\n"

- "sub $4, %%esp\n"

-#else

- "sub $8, %%esp\n" // allocate buffer for receiving timestamp

-#endif

- "push %%ebx\n"

- "mov %%fs:16, %%ebx\n" // fd = threadFdPub

- "mov %%esp, %%ecx\n" // buf = %esp

- "mov $4, %%edx\n" // len = sizeof(int)

- "1:mov %%edx, %%eax\n" // NR_write

- "int $0x80\n"

- "cmp %%eax, %%edx\n"

- "jz 7f\n"

- "cmp $-4, %%eax\n" // EINTR

- "jz 1b\n"

- "2:add $12, %%esp\n" // remove temporary buffer from stack

- "xor %%eax, %%eax\n"

- "movl $0, 0xC8(%%esp)\n" // %edx at time of segmentation fault

- "cmpw $0x310F, (%%ebp)\n" // RDTSC

- "jz 3f\n"

- "movl $0, 0xCC(%%esp)\n" // %ecx at time of segmentation fault

- "3:mov %%eax, 0xD0(%%esp)\n" // %eax at time of segmentation fault

- "4:mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault

- "addl $2, 0xDC(%%esp)\n" // %eip at time of segmentation fault

- "cmpw $0x010F, (%%ebp)\n" // RDTSCP

- "jnz 5f\n"

- "addl $1, 0xDC(%%esp)\n" // %eip at time of segmentation fault

- "5:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger

- "mov 0x1CC(%%esp), %%eax\n" // push signal number

- "push %%eax\n"

- "lea 0x270(%%esp), %%esi\n" // copy siginfo register values

- "lea 0x4(%%esp), %%edi\n" // into new location

- "mov $22, %%ecx\n"

- "cld\n"

- "rep movsl\n"

- "mov 0x2C8(%%esp), %%ebx\n" // copy first half of signal mask

- "mov %%ebx, 0x54(%%esp)\n"

- "lea 6f, %%esi\n" // copy "magic" restorer function

- "push %%esi\n" // push restorer function

- "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers

- "movb $2, %%cl\n"

- "rep movsl\n"

- "ret\n" // return to restorer function

- // The restorer function is sometimes used by gdb as a magic marker to

- // recognize signal stack frames. Don't change any of the next three

- // instructions.

- "6:pop %%eax\n" // remove dummy argument (signo)

- "mov $119, %%eax\n" // NR_sigreturn

- "int $0x80\n"

- "7:mov $12, %%edx\n" // len = 3*sizeof(int)

- "8:mov $3, %%eax\n" // NR_read

- "int $0x80\n"

- "cmp $-4, %%eax\n" // EINTR

- "jz 8b\n"

- "cmp %%eax, %%edx\n"

- "jnz 2b\n"

- "pop %%eax\n"

- "pop %%edx\n"

- "pop %%ecx\n"

- "mov %%edx, 0xC8(%%esp)\n" // %edx at time of segmentation fault

- "cmpw $0x310F, (%%ebp)\n" // RDTSC

- "jz 3b\n"

- "mov %%ecx, 0xCC(%%esp)\n" // %ecx at time of segmentation fault

- "jmp 3b\n"

- // If the instruction is INT 0, then this was probably the result

- // of playground::Library being unable to find a way to safely

- // rewrite the system call instruction. Retrieve the CPU register

- // at the time of the segmentation fault and invoke syscallWrapper().

- "9:cmpw $0x00CD, (%%ebp)\n" // INT $0x0

- "jnz 20f\n"

-#ifndef NDEBUG

- "lea 200f, %%eax\n"

- "push %%eax\n"

- "call playground$debugMessage\n"

- "add $0x4, %%esp\n"

-#endif

- "mov 0xD0(%%esp), %%eax\n" // %eax at time of segmentation fault

- "mov 0xC4(%%esp), %%ebx\n" // %ebx at time of segmentation fault

- "mov 0xCC(%%esp), %%ecx\n" // %ecx at time of segmentation fault

- "mov 0xC8(%%esp), %%edx\n" // %edx at time of segmentation fault

- "mov 0xB8(%%esp), %%esi\n" // %esi at time of segmentation fault

- "mov 0xB4(%%esp), %%edi\n" // %edi at time of segmentation fault

- "mov 0xB2(%%esp), %%ebp\n" // %ebp at time of segmentation fault

- // Handle sigprocmask() and rt_sigprocmask()

- "cmp $175, %%eax\n" // NR_rt_sigprocmask

- "jnz 10f\n"

- "mov $-22, %%eax\n" // -EINVAL

- "cmp $8, %%esi\n" // %esi = sigsetsize (8 bytes = 64 signals)

- "jl 3b\n"

- "jmp 11f\n"

- "10:cmp $126, %%eax\n" // NR_sigprocmask

- "jnz 15f\n"

- "mov $-22, %%eax\n"

- "11:mov 0xFC(%%esp), %%edi\n" // signal mask at time of segmentation fault

- "mov 0x100(%%esp), %%ebp\n"

- "test %%ecx, %%ecx\n" // only set mask, if set is non-NULL

- "jz 14f\n"

- "mov 0(%%ecx), %%esi\n"

- "mov 4(%%ecx), %%ecx\n"

- "cmp $0, %%ebx\n" // %ebx = how (SIG_BLOCK)

- "jnz 12f\n"

- "or %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault

- "or %%ecx, 0x100(%%esp)\n"

- "jmp 14f\n"

- "12:cmp $1, %%ebx\n" // %ebx = how (SIG_UNBLOCK)

- "jnz 13f\n"

- "xor $-1, %%esi\n"

- "xor $-1, %%ecx\n"

- "and %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault

- "and %%ecx, 0x100(%%esp)\n"

- "jmp 14f\n"

- "13:cmp $2, %%ebx\n" // %ebx = how (SIG_SETMASK)

- "jnz 3b\n"

- "mov %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault

- "mov %%ecx, 0x100(%%esp)\n"

- "14:xor %%eax, %%eax\n"

- "test %%edx, %%edx\n" // only return old mask, if set is non-NULL

- "jz 3b\n"

- "mov %%edi, 0(%%edx)\n" // old_set

- "mov %%ebp, 4(%%edx)\n"

- "jmp 3b\n"

- // Handle sigreturn() and rt_sigreturn()

- // See syscall.cc for a discussion on how we can emulate rt_sigreturn()

- // by calling sigreturn() with a suitably adjusted stack.

- "15:cmp $119, %%eax\n" // NR_sigreturn

- "jnz 17f\n"

- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

- "16:int $0x80\n" // sigreturn() is unrestricted

- "17:cmp $173, %%eax\n" // NR_rt_sigreturn

- "jnz 18f\n"

- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

- "sub $4, %%esp\n" // add fake return address

- "jmp 4b\n"

- // Copy signal frame onto new stack. In the process, we have to convert

- // it from an RT signal frame to a legacy signal frame.

- // See clone.cc for details

- "18:cmp $120+0xF000, %%eax\n" // NR_clone + 0xF000

- "jnz 19f\n"

- "lea -0x1C8(%%esp), %%eax\n"// retain stack frame upon returning

- "mov %%eax, 0xC0(%%esp)\n" // %esp at time of segmentation fault

- "jmp 3b\n"

- // Forward system call to syscallWrapper()

- "19:call playground$syscallWrapper\n"

- "jmp 3b\n"

- // In order to implement SA_NODEFER, we have to keep track of recursive

- // calls to SIGSEGV handlers. This means we have to increment a counter

- // before calling the user's signal handler, and decrement it on

- // leaving the user's signal handler.

- // Some signal handlers look at the return address of the signal

- // stack, and more importantly "gdb" uses the call to {,rt_}sigreturn()

- // as a magic signature when doing stacktraces. So, we have to use

- // a little more unusual code to regain control after the user's

- // signal handler is done. We adjust the return address to point to

- // non-executable memory. And when we trigger another SEGV we pop the

- // extraneous signal frame and then call sigreturn().

- // N.B. We currently do not correctly adjust the SEGV counter, if the

- // user's signal handler exits in way other than by returning (e.g. by

- // directly calling {,rt_}sigreturn(), or by calling siglongjmp()).

- "20:lea 30f, %%edi\n" // rt-style restorer function

- "lea 31f, %%esi\n" // legacy restorer function

- "cmp %%ebp, %%edi\n" // check if returning from user's handler

- "jnz 21f\n"

- "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter

- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

- "jmp 29f\n"

- "21:cmp %%ebp, %%esi\n" // check if returning from user's handler

- "jnz 22f\n"

- "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter

- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault

- "jmp 6b\n"

- // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for

- // what we are supposed to do.

- "22:lea playground$sa_segv, %%eax\n"

- "cmp $0, 0(%%eax)\n" // SIG_DFL

- "jz 23f\n"

- "cmp $1, 0(%%eax)\n" // SIG_IGN

- "jnz 24f\n" // can't really ignore synchronous signals

- // Trigger the kernel's default signal disposition. The only way we can

- // do this from seccomp mode is by blocking the signal and retriggering

- // it.

- "23:orb $4, 0xFD(%%esp)\n" // signal mask at time of segmentation fault

- "jmp 5b\n"

- // Check sa_flags:

- // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they

- // do not have any effect for SIGSEGV.

- // - We have to always register our signal handler with SA_NODEFER so

- // that the user's signal handler can make system calls which might

- // require additional help from our SEGV handler.

- // - If the user's signal handler wasn't supposed to be SA_NODEFER, then

- // we emulate this behavior by keeping track of a recursion counter.

- //

- // TODO(markus): If/when we add support for sigaltstack(), we have to

- // handle SA_ONSTACK.

- "24:cmpl $0, %%fs:0x1040-0x58\n"// check if we failed inside of SEGV handler

- "jnz 23b\n" // if so, then terminate program

- "mov 0(%%eax), %%ebx\n" // sa_segv_.sa_sigaction

- "mov 4(%%eax), %%ecx\n" // sa_segv_.sa_flags

- "btl $31, %%ecx\n" // SA_RESETHAND

- "jnc 25f\n"

- "movl $0, 0(%%eax)\n" // set handler to SIG_DFL

- "25:btl $30, %%ecx\n" // SA_NODEFER

- "jc 28f\n"

- "btl $2, %%ecx\n" // SA_SIGINFO

- "jnc 26f\n"

- "mov %%edi, 0(%%esp)\n" // trigger a SEGV on return

- "incl %%fs:0x1040-0x58\n" // increment recursion counter

- "jmp *%%ebx\n" // call user's signal handler

- "26:mov %%esi, 0(%%esp)\n"

- "incl %%fs:0x1040-0x58\n" // increment recursion counter

- // We always register the signal handler to give us rt-style signal

- // frames. But if the user asked for legacy signal frames, we must

- // convert the signal frame prior to calling the user's signal handler.

- "27:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger

- "mov 0x1CC(%%esp), %%eax\n" // push signal number

- "push %%eax\n"

- "mov 0x1CC(%%esp), %%eax\n" // push restorer function

- "push %%eax\n"

- "lea 0x274(%%esp), %%esi\n" // copy siginfo register values

- "lea 0x8(%%esp), %%edi\n" // into new location

- "mov $22, %%ecx\n"

- "cld\n"

- "rep movsl\n"

- "mov 0x2CC(%%esp), %%eax\n" // copy first half of signal mask

- "mov %%eax, 0x58(%%esp)\n"

- "lea 31f, %%esi\n"

- "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers

- "movb $2, %%cl\n"

- "rep movsl\n"

- "jmp *%%ebx\n" // call user's signal handler

- "28:lea 6b, %%eax\n" // set appropriate restorer function

- "mov %%eax, 0(%%esp)\n"

- "btl $2, %%ecx\n" // SA_SIGINFO

- "jnc 27b\n"

- "lea 29f, %%eax\n"

- "mov %%eax, 0(%%esp)\n" // set appropriate restorer function

- "jmp *%%ebx\n" // call user's signal handler

- "29:pushl $30f\n" // emulate rt_sigreturn()

- "jmp 5b\n"

- // Non-executable versions of the restorer function. We use these to

- // trigger a SEGV upon returning from the user's signal handler, giving

- // us an ability to clean up prior to returning from the SEGV handler.

- ".pushsection .data\n" // move code into non-executable section

- "30:mov $173, %%eax\n" // NR_rt_sigreturn

- "int $0x80\n" // gdb looks for this signature when doing

- ".byte 0\n" // backtraces

- "31:pop %%eax\n"

- "mov $119, %%eax\n" // NR_sigreturn

- "int $0x80\n"

- ".popsection\n"

-#else

-#error Unsupported target platform

-#endif

- ".pushsection \".rodata\"\n"

-#ifndef NDEBUG

- "100:.asciz \"RDTSC(P): Executing handler\\n\"\n"

- "200:.asciz \"INT $0x0: Executing handler\\n\"\n"

-#endif

- ".popsection\n"

- "999:pop %0\n"

- : "=g"(fnc)

- :

- : "memory"

-#if defined(__x86_64__)

- , "rsp"

-#elif defined(__i386__)

- , "esp"

-#endif

- );

- return fnc;

-SecureMem::Args* Sandbox::getSecureMem() {

- // Check trusted_thread.cc for the magic offset that gets us from the TLS

- // to the beginning of the secure memory area.

- SecureMem::Args* ret;

-#if defined(__x86_64__)

- asm volatile(

- "movq %%gs:-0xE0, %0\n"

- : "=q"(ret));

-#elif defined(__i386__)

- asm volatile(

- "movl %%fs:-0x58, %0\n"

- : "=r"(ret));

-#else

-#error Unsupported target platform

-#endif

- return ret;

-void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) {

- SysCalls sys;

- if (sys.lseek(proc_self_maps, 0, SEEK_SET) ||

- !sendFd(processFd, proc_self_maps, -1, NULL, 0)) {

- failure:

- die("Cannot access /proc/self/maps");

- }

- int dummy;

- if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) {

- goto failure;

- }

-int Sandbox::supportsSeccompSandbox(int proc_fd) {

- if (status_ != STATUS_UNKNOWN) {

- return status_ != STATUS_UNSUPPORTED;

- }

- int fds[2];

- SysCalls sys;

- if (sys.pipe(fds)) {

- status_ = STATUS_UNSUPPORTED;

- return 0;

- }

- pid_t pid;

- switch ((pid = sys.fork())) {

- case -1:

- status_ = STATUS_UNSUPPORTED;

- return 0;

- case 0: {

- int devnull = sys.open("/dev/null", O_RDWR, 0);

- if (devnull >= 0) {

- sys.dup2(devnull, 0);

- sys.dup2(devnull, 1);

- sys.dup2(devnull, 2);

- sys.close(devnull);

- }

- if (proc_fd >= 0) {

- setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0));

- }

- startSandbox();

- write(sys, fds[1], "", 1);

- // Try to tell the trusted thread to shut down the entire process in an

- // orderly fashion

- defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0);

- // If that did not work (e.g. because the kernel does not know about the

- // exit_group() system call), make a direct _exit() system call instead.

- // This system call is unrestricted in seccomp mode, so it will always

- // succeed. Normally, we don't like it, because unlike exit_group() it

- // does not terminate any other thread. But since we know that

- // exit_group() exists in all kernels which support kernel-level threads,

- // this is OK we only get here for old kernels where _exit() is OK.

- sys._exit(0);

- }

- default:

- NOINTR_SYS(sys.close(fds[1]));

- char ch;

- if (read(sys, fds[0], &ch, 1) != 1) {

- status_ = STATUS_UNSUPPORTED;

- } else {

- status_ = STATUS_AVAILABLE;

- }

- int rc;

- NOINTR_SYS(sys.waitpid(pid, &rc, 0));

- NOINTR_SYS(sys.close(fds[0]));

- return status_ != STATUS_UNSUPPORTED;

- }

-void Sandbox::setProcSelfMaps(int proc_self_maps) {

- proc_self_maps_ = proc_self_maps;

-void Sandbox::startSandbox() {

- if (status_ == STATUS_UNSUPPORTED) {

- die("The seccomp sandbox is not supported on this computer");

- } else if (status_ == STATUS_ENABLED) {

- return;

- }

- SysCalls sys;

- if (proc_self_maps_ < 0) {

- proc_self_maps_ = sys.open("/proc/self/maps", O_RDONLY, 0);

- if (proc_self_maps_ < 0) {

- die("Cannot access \"/proc/self/maps\"");

- }

- // The pid is unchanged for the entire program, so we can retrieve it once

- // and store it in a global variable.

- pid_ = sys.getpid();

- // Block all signals, except for the RDTSC handler

- setupSignalHandlers();

- // Get socketpairs for talking to the trusted process

- int pair[4];

- if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) ||

- sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) {

- die("Failed to create trusted thread");

- }

- processFdPub_ = pair[0];

- cloneFdPub_ = pair[2];

- SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1],

- pair[2], pair[3]);

- // We find all libraries that have system calls and redirect the system

- // calls to the sandbox. If we miss any system calls, the application will be

- // terminated by the kernel's seccomp code. So, from a security point of

- // view, if this code fails to identify system calls, we are still behaving

- // correctly.

- {

- Maps maps(proc_self_maps_);

- const char *libs[] = { "ld", "libc", "librt", "libpthread", NULL };

- // Intercept system calls in the VDSO segment (if any). This has to happen

- // before intercepting system calls in any of the other libraries, as

- // the main kernel entry point might be inside of the VDSO and we need to

- // determine its address before we can compare it to jumps from inside

- // other libraries.

- for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){

- Library* library = *iter;

- if (library->isVDSO() && library->parseElf()) {

- library->makeWritable(true);

- library->patchSystemCalls();

- library->makeWritable(false);

- break;

- }

- // Intercept system calls in libraries that are known to have them.

- for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){

- Library* library = *iter;

- const char* mapping = iter.name().c_str();

- // Find the actual base name of the mapped library by skipping past any

- // SPC and forward-slashes. We don't want to accidentally find matches,

- // because the directory name included part of our well-known lib names.

- //

- // Typically, prior to pruning, entries would look something like this:

- // 08:01 2289011 /lib/libc-2.7.so

- for (const char *delim = " /"; *delim; ++delim) {

- const char* skip = strrchr(mapping, *delim);

- if (skip) {

- mapping = skip + 1;

- }

- for (const char **ptr = libs; *ptr; ptr++) {

- const char *name = strstr(mapping, *ptr);

- if (name == mapping) {

- char ch = name[strlen(*ptr)];

- if (ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z') {

- if (library->parseElf()) {

- library->makeWritable(true);

- library->patchSystemCalls();

- library->makeWritable(false);

- break;

- }

- // Take a snapshot of the current memory mappings. These mappings will be

- // off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls.

- snapshotMemoryMappings(processFdPub_, proc_self_maps_);

- NOINTR_SYS(sys.close(proc_self_maps_));

- proc_self_maps_ = -1;

- // Creating the trusted thread enables sandboxing

- createTrustedThread(processFdPub_, cloneFdPub_, secureMem);

- // We can no longer check for sandboxing support at this point, but we also

- // know for a fact that it is available (as we just turned it on). So update

- // the status to reflect this information.

- status_ = STATUS_ENABLED;

-} // namespace

« no previous file with comments | « sandbox/linux/seccomp/sandbox.h ('k') | sandbox/linux/seccomp/sandbox_impl.h » ('j') | no next file with comments »