| Index: chrome/common/process_watcher_mac.cc
|
| ===================================================================
|
| --- chrome/common/process_watcher_mac.cc (revision 46468)
|
| +++ chrome/common/process_watcher_mac.cc (working copy)
|
| @@ -16,105 +16,155 @@
|
|
|
| namespace {
|
|
|
| -// Reap |child| process.
|
| -// This call blocks until completion.
|
| +const int kWaitBeforeKillSeconds = 2;
|
| +
|
| +// Reap |child| process. This call blocks until completion.
|
| void BlockingReap(pid_t child) {
|
| const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));
|
| if (result == -1) {
|
| - PLOG(ERROR) << "waitpid(" << child << ")";
|
| - NOTREACHED();
|
| + PLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";
|
| }
|
| }
|
|
|
| -// Waits for |timeout| seconds for the given |child| to exit and reap it.
|
| -// If the child doesn't exit within a couple of seconds, kill it.
|
| -void WaitForChildToDie(pid_t child, unsigned timeout) {
|
| +// Waits for |timeout| seconds for the given |child| to exit and reap it. If
|
| +// the child doesn't exit within the time specified, kills it.
|
| +//
|
| +// This function takes two approaches: first, it tries to use kqueue to
|
| +// observe when the process exits. kevent can monitor a kqueue with a
|
| +// timeout, so this method is preferred to wait for a specified period of
|
| +// time. Once the kqueue indicates the process has exited, waitpid will reap
|
| +// the exited child. If the kqueue doesn't provide an exit event notification,
|
| +// before the timeout expires, or if the kqueue fails or misbehaves, the
|
| +// process will be mercilessly killed and reaped.
|
| +//
|
| +// A child process passed to this function may be in one of several states:
|
| +// running, terminated and not yet reaped, and (apparently, and unfortunately)
|
| +// terminated and already reaped. Normally, a process will at least have been
|
| +// asked to exit before this function is called, but this is not required.
|
| +// If a process is terminating and unreaped, there may be a window between the
|
| +// time that kqueue will no longer recognize it and when it becomes an actual
|
| +// zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
|
| +// detected when kqueue indicates that the process is not running and a
|
| +// non-blocking waitpid fails to reap the process but indicates that it is
|
| +// still running. In this event, a blocking attempt to reap the process
|
| +// collects the known-dying child, preventing zombies from congregating.
|
| +//
|
| +// In the event that the kqueue misbehaves entirely, as it might under a
|
| +// EMFILE condition ("too many open files", or out of file descriptors), this
|
| +// function will forcibly kill and reap the child without delay. This
|
| +// eliminates another potential zombie vector. (If you're out of file
|
| +// descriptors, you're probably deep into something else, but that doesn't
|
| +// mean that zombies be allowed to kick you while you're down.)
|
| +//
|
| +// The fact that this function seemingly can be called to wait on a child
|
| +// that's not only already terminated but already reaped is a bit of a
|
| +// problem: a reaped child's pid can be reclaimed and may refer to a distinct
|
| +// process in that case. The fact that this function can seemingly be called
|
| +// to wait on a process that's not even a child is also a problem: kqueue will
|
| +// work in that case, but waitpid won't, and killing a non-child might not be
|
| +// the best approach.
|
| +void WaitForChildToDie(pid_t child, int timeout) {
|
| + DCHECK(child > 0);
|
| + DCHECK(timeout > 0);
|
| +
|
| + // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
|
| + // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
|
| + // call fails, this function should fall back to the last resort of trying
|
| + // to kill and reap the process. Not observing this rule will resurrect
|
| + // zombies.
|
| +
|
| + int result;
|
| +
|
| int kq = HANDLE_EINTR(kqueue());
|
| - file_util::ScopedFD auto_close(&kq);
|
| if (kq == -1) {
|
| - PLOG(ERROR) << "Failed to create kqueue";
|
| - return;
|
| - }
|
| + PLOG(ERROR) << "kqueue()";
|
| + } else {
|
| + file_util::ScopedFD auto_close_kq(&kq);
|
|
|
| - struct kevent event_to_add = {0};
|
| - EV_SET(&event_to_add, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
|
| - // Register interest with kqueue.
|
| - int result = HANDLE_EINTR(kevent(kq, &event_to_add, 1, NULL, 0, NULL));
|
| - if (result == -1 && errno == ESRCH) {
|
| - // A "No Such Process" error is fine, the process may have died already
|
| - // and been reaped by someone else. But make sure that it was/is reaped.
|
| - // Don't report an error in case it was already reaped.
|
| - HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
|
| - return;
|
| - }
|
| + struct kevent change = {0};
|
| + EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
|
| + result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL));
|
|
|
| - if (result == -1) {
|
| - PLOG(ERROR) << "Failed to register event to listen for death of pid "
|
| - << child;
|
| - return;
|
| - }
|
| + if (result == -1) {
|
| + if (errno != ESRCH) {
|
| + PLOG(ERROR) << "kevent (setup " << child << ")";
|
| + } else {
|
| + // At this point, one of the following has occurred:
|
| + // 1. The process has died but has not yet been reaped.
|
| + // 2. The process has died and has already been reaped.
|
| + // 3. The process is in the process of dying. It's no longer
|
| + // kqueueable, but it may not be waitable yet either. Mark calls
|
| + // this case the "zombie death race".
|
|
|
| - struct kevent event = {0};
|
| + result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
|
|
|
| - DCHECK(timeout != 0);
|
| + if (result != 0) {
|
| + // A positive result indicates case 1. waitpid succeeded and reaped
|
| + // the child. A result of -1 indicates case 2. The child has already
|
| + // been reaped. In both of these cases, no further action is
|
| + // necessary.
|
| + return;
|
| + }
|
|
|
| - int num_processes_that_died = -1;
|
| - using base::Time;
|
| - using base::TimeDelta;
|
| - // We need to keep track of the elapsed time - if kevent() returns
|
| - // EINTR in the middle of blocking call we want to make up what's left
|
| - // of the timeout.
|
| - TimeDelta time_left = TimeDelta::FromSeconds(timeout);
|
| - Time wakeup = Time::Now() + time_left;
|
| - while(time_left.InMilliseconds() > 0) {
|
| - const struct timespec timeout = time_left.ToTimeSpec();
|
| - num_processes_that_died = kevent(kq, NULL, 0, &event, 1, &timeout);
|
| - if (num_processes_that_died >= 0)
|
| - break;
|
| - if (num_processes_that_died == -1 && errno == EINTR) {
|
| - time_left = wakeup - Time::Now();
|
| - continue;
|
| - }
|
| + // |result| is 0, indicating case 3. The process will be waitable in
|
| + // short order. Fall back out of the kqueue code to kill it (for good
|
| + // measure) and reap it.
|
| + }
|
| + } else {
|
| + // Keep track of the elapsed time to be able to restart kevent if it's
|
| + // interrupted.
|
| + base::TimeDelta remaining_delta = base::TimeDelta::FromSeconds(timeout);
|
| + base::Time deadline = base::Time::Now() + remaining_delta;
|
| + result = -1;
|
| + struct kevent event = {0};
|
| + while (remaining_delta.InMilliseconds() > 0) {
|
| + const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();
|
| + result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec);
|
| + if (result == -1 && errno == EINTR) {
|
| + remaining_delta = deadline - base::Time::Now();
|
| + result = 0;
|
| + } else {
|
| + break;
|
| + }
|
| + }
|
|
|
| - // If we got here, kevent() must have returned -1.
|
| - PLOG(ERROR) << "kevent() failed";
|
| - break;
|
| - }
|
| -
|
| - if (num_processes_that_died == -1) {
|
| - PLOG(ERROR) << "kevent failed";
|
| - return;
|
| - }
|
| - if (num_processes_that_died == 1) {
|
| - if (event.fflags & NOTE_EXIT &&
|
| - event.ident == static_cast<uintptr_t>(child)) {
|
| - // Process died, it's safe to make a blocking call here since the
|
| - // kqueue() notification occurs when the process is already zombified.
|
| - BlockingReap(child);
|
| - return;
|
| - } else {
|
| - PLOG(ERROR) << "kevent() returned unexpected result - ke.fflags ="
|
| - << event.fflags
|
| - << " ke.ident ="
|
| - << event.ident
|
| - << " while listening for pid="
|
| - << child;
|
| + if (result == -1) {
|
| + PLOG(ERROR) << "kevent (wait " << child << ")";
|
| + } else if (result > 1) {
|
| + LOG(ERROR) << "kevent (wait " << child << "): unexpected result "
|
| + << result;
|
| + } else if (result == 1) {
|
| + if ((event.fflags & NOTE_EXIT) &&
|
| + (event.ident == static_cast<uintptr_t>(child))) {
|
| + // The process is dead or dying. This won't block for long, if at
|
| + // all.
|
| + BlockingReap(child);
|
| + return;
|
| + } else {
|
| + LOG(ERROR) << "kevent (wait " << child
|
| + << "): unexpected event: fflags=" << event.fflags
|
| + << ", ident=" << event.ident;
|
| + }
|
| + }
|
| }
|
| }
|
|
|
| - // If we got here the child is still alive so kill it...
|
| - if (kill(child, SIGKILL) == 0) {
|
| - // SIGKILL is uncatchable. Since the signal was delivered, we can
|
| - // just wait for the process to die now in a blocking manner.
|
| + // The child is still alive, or is very freshly dead. Be sure by sending it
|
| + // a signal. This is safe even if it's freshly dead, because it will be a
|
| + // zombie (or on the way to zombiedom) and kill will return 0 even if the
|
| + // signal is not delivered to a live process.
|
| + result = kill(child, SIGKILL);
|
| + if (result == -1) {
|
| + PLOG(ERROR) << "kill(" << child << ", SIGKILL)";
|
| + } else {
|
| + // The child is definitely on the way out now. BlockingReap won't need to
|
| + // wait for long, if at all.
|
| BlockingReap(child);
|
| - } else {
|
| - PLOG(ERROR) << "While waiting for " << child << " to terminate we"
|
| - << " failed to deliver a SIGKILL signal";
|
| }
|
| }
|
|
|
| } // namespace
|
|
|
| void ProcessWatcher::EnsureProcessTerminated(base::ProcessHandle process) {
|
| - WaitForChildToDie(process, 2);
|
| + WaitForChildToDie(process, kWaitBeforeKillSeconds);
|
| }
|
|
|