| Index: sandbox/linux/seccomp-bpf/sandbox_bpf.cc
|
| diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..8a9b3f7c4c80970d4becdeb8764389acf55ee914
|
| --- /dev/null
|
| +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
|
| @@ -0,0 +1,525 @@
|
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
|
| +
|
| +// Some headers on Android are missing cdefs: crbug.com/172337.
|
| +// (We can't use OS_ANDROID here since build_config.h is not included).
|
| +#if defined(ANDROID)
|
| +#include <sys/cdefs.h>
|
| +#endif
|
| +
|
| +#include <errno.h>
|
| +#include <fcntl.h>
|
| +#include <linux/filter.h>
|
| +#include <signal.h>
|
| +#include <string.h>
|
| +#include <sys/prctl.h>
|
| +#include <sys/stat.h>
|
| +#include <sys/syscall.h>
|
| +#include <sys/types.h>
|
| +#include <sys/wait.h>
|
| +#include <time.h>
|
| +#include <unistd.h>
|
| +
|
| +#include "base/compiler_specific.h"
|
| +#include "base/logging.h"
|
| +#include "base/macros.h"
|
| +#include "base/memory/scoped_ptr.h"
|
| +#include "base/posix/eintr_wrapper.h"
|
| +#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
|
| +#include "sandbox/linux/bpf_dsl/policy_compiler.h"
|
| +#include "sandbox/linux/seccomp-bpf/codegen.h"
|
| +#include "sandbox/linux/seccomp-bpf/die.h"
|
| +#include "sandbox/linux/seccomp-bpf/errorcode.h"
|
| +#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
|
| +#include "sandbox/linux/seccomp-bpf/syscall.h"
|
| +#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
|
| +#include "sandbox/linux/seccomp-bpf/trap.h"
|
| +#include "sandbox/linux/seccomp-bpf/verifier.h"
|
| +#include "sandbox/linux/services/linux_syscalls.h"
|
| +
|
| +using sandbox::bpf_dsl::Allow;
|
| +using sandbox::bpf_dsl::Error;
|
| +using sandbox::bpf_dsl::ResultExpr;
|
| +using sandbox::bpf_dsl::SandboxBPFDSLPolicy;
|
| +
|
| +namespace sandbox {
|
| +
|
| +namespace {
|
| +
|
| +const int kExpectedExitCode = 100;
|
| +
|
| +#if !defined(NDEBUG)
|
| +void WriteFailedStderrSetupMessage(int out_fd) {
|
| + const char* error_string = strerror(errno);
|
| + static const char msg[] =
|
| + "You have reproduced a puzzling issue.\n"
|
| + "Please, report to crbug.com/152530!\n"
|
| + "Failed to set up stderr: ";
|
| + if (HANDLE_EINTR(write(out_fd, msg, sizeof(msg) - 1)) > 0 && error_string &&
|
| + HANDLE_EINTR(write(out_fd, error_string, strlen(error_string))) > 0 &&
|
| + HANDLE_EINTR(write(out_fd, "\n", 1))) {
|
| + }
|
| +}
|
| +#endif // !defined(NDEBUG)
|
| +
|
| +// We define a really simple sandbox policy. It is just good enough for us
|
| +// to tell that the sandbox has actually been activated.
|
| +class ProbePolicy : public SandboxBPFDSLPolicy {
|
| + public:
|
| + ProbePolicy() {}
|
| + virtual ~ProbePolicy() {}
|
| +
|
| + virtual ResultExpr EvaluateSyscall(int sysnum) const override {
|
| + switch (sysnum) {
|
| + case __NR_getpid:
|
| + // Return EPERM so that we can check that the filter actually ran.
|
| + return Error(EPERM);
|
| + case __NR_exit_group:
|
| + // Allow exit() with a non-default return code.
|
| + return Allow();
|
| + default:
|
| + // Make everything else fail in an easily recognizable way.
|
| + return Error(EINVAL);
|
| + }
|
| + }
|
| +
|
| + private:
|
| + DISALLOW_COPY_AND_ASSIGN(ProbePolicy);
|
| +};
|
| +
|
| +void ProbeProcess(void) {
|
| + if (syscall(__NR_getpid) < 0 && errno == EPERM) {
|
| + syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
|
| + }
|
| +}
|
| +
|
| +class AllowAllPolicy : public SandboxBPFDSLPolicy {
|
| + public:
|
| + AllowAllPolicy() {}
|
| + virtual ~AllowAllPolicy() {}
|
| +
|
| + virtual ResultExpr EvaluateSyscall(int sysnum) const override {
|
| + DCHECK(SandboxBPF::IsValidSyscallNumber(sysnum));
|
| + return Allow();
|
| + }
|
| +
|
| + private:
|
| + DISALLOW_COPY_AND_ASSIGN(AllowAllPolicy);
|
| +};
|
| +
|
| +void TryVsyscallProcess(void) {
|
| + time_t current_time;
|
| + // time() is implemented as a vsyscall. With an older glibc, with
|
| + // vsyscall=emulate and some versions of the seccomp BPF patch
|
| + // we may get SIGKILL-ed. Detect this!
|
| + if (time(¤t_time) != static_cast<time_t>(-1)) {
|
| + syscall(__NR_exit_group, static_cast<intptr_t>(kExpectedExitCode));
|
| + }
|
| +}
|
| +
|
| +bool IsSingleThreaded(int proc_fd) {
|
| + if (proc_fd < 0) {
|
| + // Cannot determine whether program is single-threaded. Hope for
|
| + // the best...
|
| + return true;
|
| + }
|
| +
|
| + struct stat sb;
|
| + int task = -1;
|
| + if ((task = openat(proc_fd, "self/task", O_RDONLY | O_DIRECTORY)) < 0 ||
|
| + fstat(task, &sb) != 0 || sb.st_nlink != 3 || IGNORE_EINTR(close(task))) {
|
| + if (task >= 0) {
|
| + if (IGNORE_EINTR(close(task))) {
|
| + }
|
| + }
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +SandboxBPF::SandboxBPF()
|
| + : quiet_(false), proc_fd_(-1), sandbox_has_started_(false), policy_() {
|
| +}
|
| +
|
| +SandboxBPF::~SandboxBPF() {
|
| +}
|
| +
|
| +bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
|
| + return SyscallSet::IsValid(sysnum);
|
| +}
|
| +
|
| +bool SandboxBPF::RunFunctionInPolicy(
|
| + void (*code_in_sandbox)(),
|
| + scoped_ptr<bpf_dsl::SandboxBPFDSLPolicy> policy) {
|
| + // Block all signals before forking a child process. This prevents an
|
| + // attacker from manipulating our test by sending us an unexpected signal.
|
| + sigset_t old_mask, new_mask;
|
| + if (sigfillset(&new_mask) || sigprocmask(SIG_BLOCK, &new_mask, &old_mask)) {
|
| + SANDBOX_DIE("sigprocmask() failed");
|
| + }
|
| + int fds[2];
|
| + if (pipe2(fds, O_NONBLOCK | O_CLOEXEC)) {
|
| + SANDBOX_DIE("pipe() failed");
|
| + }
|
| +
|
| + if (fds[0] <= 2 || fds[1] <= 2) {
|
| + SANDBOX_DIE("Process started without standard file descriptors");
|
| + }
|
| +
|
| + // This code is using fork() and should only ever run single-threaded.
|
| + // Most of the code below is "async-signal-safe" and only minor changes
|
| + // would be needed to support threads.
|
| + DCHECK(IsSingleThreaded(proc_fd_));
|
| + pid_t pid = fork();
|
| + if (pid < 0) {
|
| + // Die if we cannot fork(). We would probably fail a little later
|
| + // anyway, as the machine is likely very close to running out of
|
| + // memory.
|
| + // But what we don't want to do is return "false", as a crafty
|
| + // attacker might cause fork() to fail at will and could trick us
|
| + // into running without a sandbox.
|
| + sigprocmask(SIG_SETMASK, &old_mask, NULL); // OK, if it fails
|
| + SANDBOX_DIE("fork() failed unexpectedly");
|
| + }
|
| +
|
| + // In the child process
|
| + if (!pid) {
|
| + // Test a very simple sandbox policy to verify that we can
|
| + // successfully turn on sandboxing.
|
| + Die::EnableSimpleExit();
|
| +
|
| + errno = 0;
|
| + if (IGNORE_EINTR(close(fds[0]))) {
|
| + // This call to close() has been failing in strange ways. See
|
| + // crbug.com/152530. So we only fail in debug mode now.
|
| +#if !defined(NDEBUG)
|
| + WriteFailedStderrSetupMessage(fds[1]);
|
| + SANDBOX_DIE(NULL);
|
| +#endif
|
| + }
|
| + if (HANDLE_EINTR(dup2(fds[1], 2)) != 2) {
|
| + // Stderr could very well be a file descriptor to .xsession-errors, or
|
| + // another file, which could be backed by a file system that could cause
|
| + // dup2 to fail while trying to close stderr. It's important that we do
|
| + // not fail on trying to close stderr.
|
| + // If dup2 fails here, we will continue normally, this means that our
|
| + // parent won't cause a fatal failure if something writes to stderr in
|
| + // this child.
|
| +#if !defined(NDEBUG)
|
| + // In DEBUG builds, we still want to get a report.
|
| + WriteFailedStderrSetupMessage(fds[1]);
|
| + SANDBOX_DIE(NULL);
|
| +#endif
|
| + }
|
| + if (IGNORE_EINTR(close(fds[1]))) {
|
| + // This call to close() has been failing in strange ways. See
|
| + // crbug.com/152530. So we only fail in debug mode now.
|
| +#if !defined(NDEBUG)
|
| + WriteFailedStderrSetupMessage(fds[1]);
|
| + SANDBOX_DIE(NULL);
|
| +#endif
|
| + }
|
| +
|
| + SetSandboxPolicy(policy.release());
|
| + if (!StartSandbox(PROCESS_SINGLE_THREADED)) {
|
| + SANDBOX_DIE(NULL);
|
| + }
|
| +
|
| + // Run our code in the sandbox.
|
| + code_in_sandbox();
|
| +
|
| + // code_in_sandbox() is not supposed to return here.
|
| + SANDBOX_DIE(NULL);
|
| + }
|
| +
|
| + // In the parent process.
|
| + if (IGNORE_EINTR(close(fds[1]))) {
|
| + SANDBOX_DIE("close() failed");
|
| + }
|
| + if (sigprocmask(SIG_SETMASK, &old_mask, NULL)) {
|
| + SANDBOX_DIE("sigprocmask() failed");
|
| + }
|
| + int status;
|
| + if (HANDLE_EINTR(waitpid(pid, &status, 0)) != pid) {
|
| + SANDBOX_DIE("waitpid() failed unexpectedly");
|
| + }
|
| + bool rc = WIFEXITED(status) && WEXITSTATUS(status) == kExpectedExitCode;
|
| +
|
| + // If we fail to support sandboxing, there might be an additional
|
| + // error message. If so, this was an entirely unexpected and fatal
|
| + // failure. We should report the failure and somebody must fix
|
| + // things. This is probably a security-critical bug in the sandboxing
|
| + // code.
|
| + if (!rc) {
|
| + char buf[4096];
|
| + ssize_t len = HANDLE_EINTR(read(fds[0], buf, sizeof(buf) - 1));
|
| + if (len > 0) {
|
| + while (len > 1 && buf[len - 1] == '\n') {
|
| + --len;
|
| + }
|
| + buf[len] = '\000';
|
| + SANDBOX_DIE(buf);
|
| + }
|
| + }
|
| + if (IGNORE_EINTR(close(fds[0]))) {
|
| + SANDBOX_DIE("close() failed");
|
| + }
|
| +
|
| + return rc;
|
| +}
|
| +
|
| +bool SandboxBPF::KernelSupportSeccompBPF() {
|
| + return RunFunctionInPolicy(
|
| + ProbeProcess,
|
| + scoped_ptr<bpf_dsl::SandboxBPFDSLPolicy>(new ProbePolicy())) &&
|
| + RunFunctionInPolicy(
|
| + TryVsyscallProcess,
|
| + scoped_ptr<bpf_dsl::SandboxBPFDSLPolicy>(new AllowAllPolicy()));
|
| +}
|
| +
|
| +// static
|
| +SandboxBPF::SandboxStatus SandboxBPF::SupportsSeccompSandbox(int proc_fd) {
|
| + // It the sandbox is currently active, we clearly must have support for
|
| + // sandboxing.
|
| + if (status_ == STATUS_ENABLED) {
|
| + return status_;
|
| + }
|
| +
|
| + // Even if the sandbox was previously available, something might have
|
| + // changed in our run-time environment. Check one more time.
|
| + if (status_ == STATUS_AVAILABLE) {
|
| + if (!IsSingleThreaded(proc_fd)) {
|
| + status_ = STATUS_UNAVAILABLE;
|
| + }
|
| + return status_;
|
| + }
|
| +
|
| + if (status_ == STATUS_UNAVAILABLE && IsSingleThreaded(proc_fd)) {
|
| + // All state transitions resulting in STATUS_UNAVAILABLE are immediately
|
| + // preceded by STATUS_AVAILABLE. Furthermore, these transitions all
|
| + // happen, if and only if they are triggered by the process being multi-
|
| + // threaded.
|
| + // In other words, if a single-threaded process is currently in the
|
| + // STATUS_UNAVAILABLE state, it is safe to assume that sandboxing is
|
| + // actually available.
|
| + status_ = STATUS_AVAILABLE;
|
| + return status_;
|
| + }
|
| +
|
| + // If we have not previously checked for availability of the sandbox or if
|
| + // we otherwise don't believe to have a good cached value, we have to
|
| + // perform a thorough check now.
|
| + if (status_ == STATUS_UNKNOWN) {
|
| + // We create our own private copy of a "Sandbox" object. This ensures that
|
| + // the object does not have any policies configured, that might interfere
|
| + // with the tests done by "KernelSupportSeccompBPF()".
|
| + SandboxBPF sandbox;
|
| +
|
| + // By setting "quiet_ = true" we suppress messages for expected and benign
|
| + // failures (e.g. if the current kernel lacks support for BPF filters).
|
| + sandbox.quiet_ = true;
|
| + sandbox.set_proc_fd(proc_fd);
|
| + status_ = sandbox.KernelSupportSeccompBPF() ? STATUS_AVAILABLE
|
| + : STATUS_UNSUPPORTED;
|
| +
|
| + // As we are performing our tests from a child process, the run-time
|
| + // environment that is visible to the sandbox is always guaranteed to be
|
| + // single-threaded. Let's check here whether the caller is single-
|
| + // threaded. Otherwise, we mark the sandbox as temporarily unavailable.
|
| + if (status_ == STATUS_AVAILABLE && !IsSingleThreaded(proc_fd)) {
|
| + status_ = STATUS_UNAVAILABLE;
|
| + }
|
| + }
|
| + return status_;
|
| +}
|
| +
|
| +// static
|
| +SandboxBPF::SandboxStatus
|
| +SandboxBPF::SupportsSeccompThreadFilterSynchronization() {
|
| + // Applying NO_NEW_PRIVS, a BPF filter, and synchronizing the filter across
|
| + // the thread group are all handled atomically by this syscall.
|
| + const int rv = syscall(
|
| + __NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, NULL);
|
| +
|
| + if (rv == -1 && errno == EFAULT) {
|
| + return STATUS_AVAILABLE;
|
| + } else {
|
| + // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
|
| + CHECK_EQ(-1, rv);
|
| + CHECK(ENOSYS == errno || EINVAL == errno);
|
| + return STATUS_UNSUPPORTED;
|
| + }
|
| +}
|
| +
|
| +void SandboxBPF::set_proc_fd(int proc_fd) { proc_fd_ = proc_fd; }
|
| +
|
| +bool SandboxBPF::StartSandbox(SandboxThreadState thread_state) {
|
| + CHECK(thread_state == PROCESS_SINGLE_THREADED ||
|
| + thread_state == PROCESS_MULTI_THREADED);
|
| +
|
| + if (status_ == STATUS_UNSUPPORTED || status_ == STATUS_UNAVAILABLE) {
|
| + SANDBOX_DIE(
|
| + "Trying to start sandbox, even though it is known to be "
|
| + "unavailable");
|
| + return false;
|
| + } else if (sandbox_has_started_) {
|
| + SANDBOX_DIE(
|
| + "Cannot repeatedly start sandbox. Create a separate Sandbox "
|
| + "object instead.");
|
| + return false;
|
| + }
|
| + if (proc_fd_ < 0) {
|
| + proc_fd_ = open("/proc", O_RDONLY | O_DIRECTORY);
|
| + }
|
| + if (proc_fd_ < 0) {
|
| + // For now, continue in degraded mode, if we can't access /proc.
|
| + // In the future, we might want to tighten this requirement.
|
| + }
|
| +
|
| + bool supports_tsync =
|
| + SupportsSeccompThreadFilterSynchronization() == STATUS_AVAILABLE;
|
| +
|
| + if (thread_state == PROCESS_SINGLE_THREADED) {
|
| + if (!IsSingleThreaded(proc_fd_)) {
|
| + SANDBOX_DIE("Cannot start sandbox; process is already multi-threaded");
|
| + return false;
|
| + }
|
| + } else if (thread_state == PROCESS_MULTI_THREADED) {
|
| + if (IsSingleThreaded(proc_fd_)) {
|
| + SANDBOX_DIE("Cannot start sandbox; "
|
| + "process may be single-threaded when reported as not");
|
| + return false;
|
| + }
|
| + if (!supports_tsync) {
|
| + SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
|
| + "filters for a threadgroup");
|
| + return false;
|
| + }
|
| + }
|
| +
|
| + // We no longer need access to any files in /proc. We want to do this
|
| + // before installing the filters, just in case that our policy denies
|
| + // close().
|
| + if (proc_fd_ >= 0) {
|
| + if (IGNORE_EINTR(close(proc_fd_))) {
|
| + SANDBOX_DIE("Failed to close file descriptor for /proc");
|
| + return false;
|
| + }
|
| + proc_fd_ = -1;
|
| + }
|
| +
|
| + // Install the filters.
|
| + InstallFilter(supports_tsync || thread_state == PROCESS_MULTI_THREADED);
|
| +
|
| + // We are now inside the sandbox.
|
| + status_ = STATUS_ENABLED;
|
| +
|
| + return true;
|
| +}
|
| +
|
| +// Don't take a scoped_ptr here, polymorphism make their use awkward.
|
| +void SandboxBPF::SetSandboxPolicy(bpf_dsl::SandboxBPFDSLPolicy* policy) {
|
| + DCHECK(!policy_);
|
| + if (sandbox_has_started_) {
|
| + SANDBOX_DIE("Cannot change policy after sandbox has started");
|
| + }
|
| + policy_.reset(policy);
|
| +}
|
| +
|
| +void SandboxBPF::InstallFilter(bool must_sync_threads) {
|
| + // We want to be very careful in not imposing any requirements on the
|
| + // policies that are set with SetSandboxPolicy(). This means, as soon as
|
| + // the sandbox is active, we shouldn't be relying on libraries that could
|
| + // be making system calls. This, for example, means we should avoid
|
| + // using the heap and we should avoid using STL functions.
|
| + // Temporarily copy the contents of the "program" vector into a
|
| + // stack-allocated array; and then explicitly destroy that object.
|
| + // This makes sure we don't ex- or implicitly call new/delete after we
|
| + // installed the BPF filter program in the kernel. Depending on the
|
| + // system memory allocator that is in effect, these operators can result
|
| + // in system calls to things like munmap() or brk().
|
| + CodeGen::Program* program = AssembleFilter(false).release();
|
| +
|
| + struct sock_filter bpf[program->size()];
|
| + const struct sock_fprog prog = {static_cast<unsigned short>(program->size()),
|
| + bpf};
|
| + memcpy(bpf, &(*program)[0], sizeof(bpf));
|
| + delete program;
|
| +
|
| + // Make an attempt to release memory that is no longer needed here, rather
|
| + // than in the destructor. Try to avoid as much as possible to presume of
|
| + // what will be possible to do in the new (sandboxed) execution environment.
|
| + policy_.reset();
|
| +
|
| + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
|
| + SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to enable no-new-privs");
|
| + }
|
| +
|
| + // Install BPF filter program. If the thread state indicates multi-threading
|
| + // support, then the kernel hass the seccomp system call. Otherwise, fall
|
| + // back on prctl, which requires the process to be single-threaded.
|
| + if (must_sync_threads) {
|
| + int rv = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
|
| + SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast<const char*>(&prog));
|
| + if (rv) {
|
| + SANDBOX_DIE(quiet_ ? NULL :
|
| + "Kernel refuses to turn on and synchronize threads for BPF filters");
|
| + }
|
| + } else {
|
| + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
|
| + SANDBOX_DIE(quiet_ ? NULL : "Kernel refuses to turn on BPF filters");
|
| + }
|
| + }
|
| +
|
| + sandbox_has_started_ = true;
|
| +}
|
| +
|
| +scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter(
|
| + bool force_verification) {
|
| +#if !defined(NDEBUG)
|
| + force_verification = true;
|
| +#endif
|
| +
|
| + bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
|
| + scoped_ptr<CodeGen::Program> program = compiler.Compile();
|
| +
|
| + // Make sure compilation resulted in BPF program that executes
|
| + // correctly. Otherwise, there is an internal error in our BPF compiler.
|
| + // There is really nothing the caller can do until the bug is fixed.
|
| + if (force_verification) {
|
| + // Verification is expensive. We only perform this step, if we are
|
| + // compiled in debug mode, or if the caller explicitly requested
|
| + // verification.
|
| +
|
| + const char* err = NULL;
|
| + if (!Verifier::VerifyBPF(&compiler, *program, *policy_, &err)) {
|
| + CodeGen::PrintProgram(*program);
|
| + SANDBOX_DIE(err);
|
| + }
|
| + }
|
| +
|
| + return program.Pass();
|
| +}
|
| +
|
| +bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
|
| + return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
|
| +}
|
| +
|
| +intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
|
| + return Syscall::Call(args.nr,
|
| + static_cast<intptr_t>(args.args[0]),
|
| + static_cast<intptr_t>(args.args[1]),
|
| + static_cast<intptr_t>(args.args[2]),
|
| + static_cast<intptr_t>(args.args[3]),
|
| + static_cast<intptr_t>(args.args[4]),
|
| + static_cast<intptr_t>(args.args[5]));
|
| +}
|
| +
|
| +SandboxBPF::SandboxStatus SandboxBPF::status_ = STATUS_UNKNOWN;
|
| +
|
| +} // namespace sandbox
|
|
|