sandbox/linux/seccomp-bpf/sandbox_bpf.cc - Issue 670183003: Update from chromium 62675d9fb31fb8cedc40f68e78e8445a74f362e7

Unified Diff: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

Issue 670183003: Update from chromium 62675d9fb31fb8cedc40f68e78e8445a74f362e7 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: sandbox/linux/seccomp-bpf/sandbox_bpf.cc

diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc

new file mode 100644

index 0000000000000000000000000000000000000000..8a9b3f7c4c80970d4becdeb8764389acf55ee914

--- /dev/null

+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc

@@ -0,0 +1,525 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"

+// Some headers on Android are missing cdefs: crbug.com/172337.

+// (We can't use OS_ANDROID here since build_config.h is not included).

+#if defined(ANDROID)

+#include <sys/cdefs.h>

+#endif

+#include <errno.h>

+#include <fcntl.h>

+#include <linux/filter.h>

+#include <signal.h>

+#include <string.h>

+#include <sys/prctl.h>

+#include <sys/stat.h>

+#include <sys/syscall.h>

+#include <sys/types.h>

+#include <sys/wait.h>

+#include <time.h>

+#include <unistd.h>

+#include "base/compiler_specific.h"

+#include "base/logging.h"

+#include "base/macros.h"

+#include "base/memory/scoped_ptr.h"

+#include "base/posix/eintr_wrapper.h"

+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"

+#include "sandbox/linux/bpf_dsl/policy_compiler.h"

+#include "sandbox/linux/seccomp-bpf/codegen.h"

+#include "sandbox/linux/seccomp-bpf/die.h"

+#include "sandbox/linux/seccomp-bpf/errorcode.h"

+#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"

+#include "sandbox/linux/seccomp-bpf/syscall.h"

+#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"

+#include "sandbox/linux/seccomp-bpf/trap.h"

+#include "sandbox/linux/seccomp-bpf/verifier.h"

+#include "sandbox/linux/services/linux_syscalls.h"

+using sandbox::bpf_dsl::Allow;

+using sandbox::bpf_dsl::Error;

+using sandbox::bpf_dsl::ResultExpr;

+using sandbox::bpf_dsl::SandboxBPFDSLPolicy;

+namespace sandbox {

+namespace {

+const int kExpectedExitCode = 100;

+#if !defined(NDEBUG)

+void WriteFailedStderrSetupMessage(int out_fd) {

+ const char* error_string = strerror(errno);

+ static const char msg[] =

+ "You have reproduced a puzzling issue.\n"

+ "Please, report to crbug.com/152530!\n"

+ "Failed to set up stderr: ";

+ if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg) - 1)) > 0 && error_string &&

+ HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&

+ HANDLE_EINTR(write(out_fd, "\n", 1))) {

+ }

+#endif // !defined(NDEBUG)

+// We define a really simple sandbox policy. It is just good enough for us

+// to tell that the sandbox has actually been activated.

+class ProbePolicy : public SandboxBPFDSLPolicy {

+ public:

+ ProbePolicy() {}

+ virtual ~ProbePolicy() {}

+ virtual ResultExpr EvaluateSyscall(int sysnum) const override {

+ switch (sysnum) {

+ case __NR_getpid:

+ // Return EPERM so that we can check that the filter actually ran.

+ return Error(EPERM);

+ case __NR_exit_group:

+ // Allow exit() with a non-default return code.

+ return Allow();

+ default:

+ // Make everything else fail in an easily recognizable way.

+ return Error(EINVAL);

+ }

+ private:

+ DISALLOW_COPY_AND_ASSIGN(ProbePolicy);

+};

+void ProbeProcess(void) {

+ if (syscall(__NR_getpid) < 0 && errno == EPERM) {

+ syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

+ }

+class AllowAllPolicy : public SandboxBPFDSLPolicy {

+ public:

+ AllowAllPolicy() {}

+ virtual ~AllowAllPolicy() {}

+ virtual ResultExpr EvaluateSyscall(int sysnum) const override {

+ DCHECK(SandboxBPF::IsValidSyscallNumber(sysnum));

+ return Allow();

+ }

+ private:

+ DISALLOW_COPY_AND_ASSIGN(AllowAllPolicy);

+};

+void TryVsyscallProcess(void) {

+ time_t current_time;

+ // time() is implemented as a vsyscall. With an older glibc, with

+ // vsyscall=emulate and some versions of the seccomp BPF patch

+ // we may get SIGKILL-ed. Detect this!

+ if (time(&current_time) != static_cast<time_t>(-1)) {

+ syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));

+ }

+bool IsSingleThreaded(int proc_fd) {

+ if (proc_fd < 0) {

+ // Cannot determine whether program is single-threaded. Hope for

+ // the best...

+ return true;

+ }

+ struct stat sb;

+ int task = -1;

+ if ((task = openat(proc_fd, "self/task", O_RDONLY | O_DIRECTORY)) < 0 ||

+ fstat(task, &sb) != 0 || sb.st_nlink != 3 || IGNORE_EINTR(close(task))) {

+ if (task >= 0) {

+ if (IGNORE_EINTR(close(task))) {

+ }

+ return false;

+ }

+ return true;

+} // namespace

+SandboxBPF::SandboxBPF()

+ : quiet_(false), proc_fd_(-1), sandbox_has_started_(false), policy_() {

+SandboxBPF::~SandboxBPF() {

+bool SandboxBPF::IsValidSyscallNumber(int sysnum) {

+ return SyscallSet::IsValid(sysnum);

+bool SandboxBPF::RunFunctionInPolicy(

+ void (*code_in_sandbox)(),

+ scoped_ptr<bpf_dsl::SandboxBPFDSLPolicy> policy) {

+ // Block all signals before forking a child process. This prevents an

+ // attacker from manipulating our test by sending us an unexpected signal.

+ sigset_t old_mask, new_mask;

+ if (sigfillset(&new_mask) || sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {

+ SANDBOX_DIE("sigprocmask() failed");

+ }

+ int fds[2];

+ if (pipe2(fds, O_NONBLOCK | O_CLOEXEC)) {

+ SANDBOX_DIE("pipe() failed");

+ }

+ if (fds[0] <= 2 || fds[1] <= 2) {

+ SANDBOX_DIE("Process started without standard file descriptors");

+ }

+ // This code is using fork() and should only ever run single-threaded.

+ // Most of the code below is "async-signal-safe" and only minor changes

+ // would be needed to support threads.

+ DCHECK(IsSingleThreaded(proc_fd_));

+ pid_t pid = fork();

+ if (pid < 0) {

+ // Die if we cannot fork(). We would probably fail a little later

+ // anyway, as the machine is likely very close to running out of

+ // memory.

+ // But what we don't want to do is return "false", as a crafty

+ // attacker might cause fork() to fail at will and could trick us

+ // into running without a sandbox.

+ sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails

+ SANDBOX_DIE("fork() failed unexpectedly");

+ }

+ // In the child process

+ if (!pid) {

+ // Test a very simple sandbox policy to verify that we can

+ // successfully turn on sandboxing.

+ Die::EnableSimpleExit();

+ errno = 0;

+ if (IGNORE_EINTR(close(fds[0]))) {

+ // This call to close() has been failing in strange ways. See

+ // crbug.com/152530. So we only fail in debug mode now.

+#if !defined(NDEBUG)

+ WriteFailedStderrSetupMessage(fds[1]);

+ SANDBOX_DIE(NULL);

+#endif

+ }

+ if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {

+ // Stderr could very well be a file descriptor to .xsession-errors, or

+ // another file, which could be backed by a file system that could cause

+ // dup2 to fail while trying to close stderr. It's important that we do

+ // not fail on trying to close stderr.

+ // If dup2 fails here, we will continue normally, this means that our

+ // parent won't cause a fatal failure if something writes to stderr in

+ // this child.

+#if !defined(NDEBUG)

+ // In DEBUG builds, we still want to get a report.

+ WriteFailedStderrSetupMessage(fds[1]);

+ SANDBOX_DIE(NULL);

+#endif

+ }

+ if (IGNORE_EINTR(close(fds[1]))) {

+ // This call to close() has been failing in strange ways. See

+ // crbug.com/152530. So we only fail in debug mode now.

+#if !defined(NDEBUG)

+ WriteFailedStderrSetupMessage(fds[1]);

+ SANDBOX_DIE(NULL);

+#endif

+ }

+ SetSandboxPolicy(policy.release());

+ if (!StartSandbox(PROCESS_SINGLE_THREADED)) {

+ SANDBOX_DIE(NULL);

+ }

+ // Run our code in the sandbox.

+ code_in_sandbox();

+ // code_in_sandbox() is not supposed to return here.

+ SANDBOX_DIE(NULL);

+ }

+ // In the parent process.

+ if (IGNORE_EINTR(close(fds[1]))) {

+ SANDBOX_DIE("close() failed");

+ }

+ if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {

+ SANDBOX_DIE("sigprocmask() failed");

+ }

+ int status;

+ if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {

+ SANDBOX_DIE("waitpid() failed unexpectedly");

+ }

+ bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;

+ // If we fail to support sandboxing, there might be an additional

+ // error message. If so, this was an entirely unexpected and fatal

+ // failure. We should report the failure and somebody must fix

+ // things. This is probably a security-critical bug in the sandboxing

+ // code.

+ if (!rc) {

+ char buf[4096];

+ ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1));

+ if (len > 0) {

+ while (len > 1 && buf[len - 1] == '\n') {

+ --len;

+ }

+ buf[len] = '\000';

+ SANDBOX_DIE(buf);

+ }

+ if (IGNORE_EINTR(close(fds[0]))) {

+ SANDBOX_DIE("close() failed");

+ }

+ return rc;

+bool SandboxBPF::KernelSupportSeccompBPF() {

+ return RunFunctionInPolicy(

+ ProbeProcess,

+ scoped_ptr<bpf_dsl::SandboxBPFDSLPolicy>(new ProbePolicy())) &&

+ RunFunctionInPolicy(

+ TryVsyscallProcess,

+ scoped_ptr<bpf_dsl::SandboxBPFDSLPolicy>(new AllowAllPolicy()));

+// static

+SandboxBPF::SandboxStatus SandboxBPF::SupportsSeccompSandbox(int proc_fd) {

+ // It the sandbox is currently active, we clearly must have support for

+ // sandboxing.

+ if (status_ == STATUS_ENABLED) {

+ return status_;

+ }

+ // Even if the sandbox was previously available, something might have

+ // changed in our run-time environment. Check one more time.

+ if (status_ == STATUS_AVAILABLE) {

+ if (!IsSingleThreaded(proc_fd)) {

+ status_ = STATUS_UNAVAILABLE;

+ }

+ return status_;

+ }

+ if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {

+ // All state transitions resulting in STATUS_UNAVAILABLE are immediately

+ // preceded by STATUS_AVAILABLE. Furthermore, these transitions all

+ // happen, if and only if they are triggered by the process being multi-

+ // threaded.

+ // In other words, if a single-threaded process is currently in the

+ // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is

+ // actually available.

+ status_ = STATUS_AVAILABLE;

+ return status_;

+ }

+ // If we have not previously checked for availability of the sandbox or if

+ // we otherwise don't believe to have a good cached value, we have to

+ // perform a thorough check now.

+ if (status_ == STATUS_UNKNOWN) {

+ // We create our own private copy of a "Sandbox" object. This ensures that

+ // the object does not have any policies configured, that might interfere

+ // with the tests done by "KernelSupportSeccompBPF()".

+ SandboxBPF sandbox;

+ // By setting "quiet_ = true" we suppress messages for expected and benign

+ // failures (e.g. if the current kernel lacks support for BPF filters).

+ sandbox.quiet_ = true;

+ sandbox.set_proc_fd(proc_fd);

+ status_ = sandbox.KernelSupportSeccompBPF() ? STATUS_AVAILABLE

+ : STATUS_UNSUPPORTED;

+ // As we are performing our tests from a child process, the run-time

+ // environment that is visible to the sandbox is always guaranteed to be

+ // single-threaded. Let's check here whether the caller is single-

+ // threaded. Otherwise, we mark the sandbox as temporarily unavailable.

+ if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {

+ status_ = STATUS_UNAVAILABLE;

+ }

+ return status_;

+// static

+SandboxBPF::SandboxStatus

+SandboxBPF::SupportsSeccompThreadFilterSynchronization() {

+ // Applying NO_NEW_PRIVS, a BPF filter, and synchronizing the filter across

+ // the thread group are all handled atomically by this syscall.

+ const int rv = syscall(

+ __NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, NULL);

+ if (rv == -1 && errno == EFAULT) {

+ return STATUS_AVAILABLE;

+ } else {

+ // TODO(jln): turn these into DCHECK after 417888 is considered fixed.

+ CHECK_EQ(-1, rv);

+ CHECK(ENOSYS == errno || EINVAL == errno);

+ return STATUS_UNSUPPORTED;

+ }

+void SandboxBPF::set_proc_fd(int proc_fd) { proc_fd_ = proc_fd; }

+bool SandboxBPF::StartSandbox(SandboxThreadState thread_state) {

+ CHECK(thread_state == PROCESS_SINGLE_THREADED ||

+ thread_state == PROCESS_MULTI_THREADED);

+ if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {

+ SANDBOX_DIE(

+ "Trying to start sandbox, even though it is known to be "

+ "unavailable");

+ return false;

+ } else if (sandbox_has_started_) {

+ SANDBOX_DIE(

+ "Cannot repeatedly start sandbox. Create a separate Sandbox "

+ "object instead.");

+ return false;

+ }

+ if (proc_fd_ < 0) {

+ proc_fd_ = open("/proc", O_RDONLY | O_DIRECTORY);

+ }

+ if (proc_fd_ < 0) {

+ // For now, continue in degraded mode, if we can't access /proc.

+ // In the future, we might want to tighten this requirement.

+ }

+ bool supports_tsync =

+ SupportsSeccompThreadFilterSynchronization() == STATUS_AVAILABLE;

+ if (thread_state == PROCESS_SINGLE_THREADED) {

+ if (!IsSingleThreaded(proc_fd_)) {

+ SANDBOX_DIE("Cannot start sandbox; process is already multi-threaded");

+ return false;

+ }

+ } else if (thread_state == PROCESS_MULTI_THREADED) {

+ if (IsSingleThreaded(proc_fd_)) {

+ SANDBOX_DIE("Cannot start sandbox; "

+ "process may be single-threaded when reported as not");

+ return false;

+ }

+ if (!supports_tsync) {

+ SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "

+ "filters for a threadgroup");

+ return false;

+ }

+ // We no longer need access to any files in /proc. We want to do this

+ // before installing the filters, just in case that our policy denies

+ // close().

+ if (proc_fd_ >= 0) {

+ if (IGNORE_EINTR(close(proc_fd_))) {

+ SANDBOX_DIE("Failed to close file descriptor for /proc");

+ return false;

+ }

+ proc_fd_ = -1;

+ }

+ // Install the filters.

+ InstallFilter(supports_tsync || thread_state == PROCESS_MULTI_THREADED);

+ // We are now inside the sandbox.

+ status_ = STATUS_ENABLED;

+ return true;

+// Don't take a scoped_ptr here, polymorphism make their use awkward.

+void SandboxBPF::SetSandboxPolicy(bpf_dsl::SandboxBPFDSLPolicy* policy) {

+ DCHECK(!policy_);

+ if (sandbox_has_started_) {

+ SANDBOX_DIE("Cannot change policy after sandbox has started");

+ }

+ policy_.reset(policy);

+void SandboxBPF::InstallFilter(bool must_sync_threads) {

+ // We want to be very careful in not imposing any requirements on the

+ // policies that are set with SetSandboxPolicy(). This means, as soon as

+ // the sandbox is active, we shouldn't be relying on libraries that could

+ // be making system calls. This, for example, means we should avoid

+ // using the heap and we should avoid using STL functions.

+ // Temporarily copy the contents of the "program" vector into a

+ // stack-allocated array; and then explicitly destroy that object.

+ // This makes sure we don't ex- or implicitly call new/delete after we

+ // installed the BPF filter program in the kernel. Depending on the

+ // system memory allocator that is in effect, these operators can result

+ // in system calls to things like munmap() or brk().

+ CodeGen::Program* program = AssembleFilter(false).release();

+ struct sock_filter bpf[program->size()];

+ const struct sock_fprog prog = {static_cast<unsigned short>(program->size()),

+ bpf};

+ memcpy(bpf, &(*program)[0], sizeof(bpf));

+ delete program;

+ // Make an attempt to release memory that is no longer needed here, rather

+ // than in the destructor. Try to avoid as much as possible to presume of

+ // what will be possible to do in the new (sandboxed) execution environment.

+ policy_.reset();

+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {

+ SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");

+ }

+ // Install BPF filter program. If the thread state indicates multi-threading

+ // support, then the kernel hass the seccomp system call. Otherwise, fall

+ // back on prctl, which requires the process to be single-threaded.

+ if (must_sync_threads) {

+ int rv = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,

+ SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast<const char*>(&prog));

+ if (rv) {

+ SANDBOX_DIE(quiet_ ? NULL :

+ "Kernel refuses to turn on and synchronize threads for BPF filters");

+ }

+ } else {

+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {

+ SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");

+ }

+ sandbox_has_started_ = true;

+scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter(

+ bool force_verification) {

+#if !defined(NDEBUG)

+ force_verification = true;

+#endif

+ bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());

+ scoped_ptr<CodeGen::Program> program = compiler.Compile();

+ // Make sure compilation resulted in BPF program that executes

+ // correctly. Otherwise, there is an internal error in our BPF compiler.

+ // There is really nothing the caller can do until the bug is fixed.

+ if (force_verification) {

+ // Verification is expensive. We only perform this step, if we are

+ // compiled in debug mode, or if the caller explicitly requested

+ // verification.

+ const char* err = NULL;

+ if (!Verifier::VerifyBPF(&compiler, *program, *policy_, &err)) {

+ CodeGen::PrintProgram(*program);

+ SANDBOX_DIE(err);

+ }

+ return program.Pass();

+bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {

+ return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);

+intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {

+ return Syscall::Call(args.nr,

+ static_cast<intptr_t>(args.args[0]),

+ static_cast<intptr_t>(args.args[1]),

+ static_cast<intptr_t>(args.args[2]),

+ static_cast<intptr_t>(args.args[3]),

+ static_cast<intptr_t>(args.args[4]),

+ static_cast<intptr_t>(args.args[5]));

+SandboxBPF::SandboxStatus SandboxBPF::status_ = STATUS_UNKNOWN;

+} // namespace sandbox

« no previous file with comments | « sandbox/linux/seccomp-bpf/sandbox_bpf.h ('k') | sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.h » ('j') | no next file with comments »