chrome/common/process_watcher_mac.cc - Issue 1978001: Merge r46466 from the trunk to the 375 branch....

Unified Diff: chrome/common/process_watcher_mac.cc

Issue 1978001: Merge r46466 from the trunk to the 375 branch.... (Closed) Base URL: svn://svn.chromium.org/chrome/branches/375/src/

Patch Set: Created 10 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/common/process_watcher_mac.cc

===================================================================

--- chrome/common/process_watcher_mac.cc (revision 46468)

+++ chrome/common/process_watcher_mac.cc (working copy)

@@ -16,105 +16,155 @@

namespace {

-// Reap |child| process.

-// This call blocks until completion.

+const int kWaitBeforeKillSeconds = 2;

+// Reap |child| process. This call blocks until completion.

void BlockingReap(pid_t child) {

const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));

if (result == -1) {

- PLOG(ERROR) << "waitpid(" << child << ")";

- NOTREACHED();

+ PLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";

}

-// Waits for |timeout| seconds for the given |child| to exit and reap it.

-// If the child doesn't exit within a couple of seconds, kill it.

-void WaitForChildToDie(pid_t child, unsigned timeout) {

+// Waits for |timeout| seconds for the given |child| to exit and reap it. If

+// the child doesn't exit within the time specified, kills it.

+//

+// This function takes two approaches: first, it tries to use kqueue to

+// observe when the process exits. kevent can monitor a kqueue with a

+// timeout, so this method is preferred to wait for a specified period of

+// time. Once the kqueue indicates the process has exited, waitpid will reap

+// the exited child. If the kqueue doesn't provide an exit event notification,

+// before the timeout expires, or if the kqueue fails or misbehaves, the

+// process will be mercilessly killed and reaped.

+//

+// A child process passed to this function may be in one of several states:

+// running, terminated and not yet reaped, and (apparently, and unfortunately)

+// terminated and already reaped. Normally, a process will at least have been

+// asked to exit before this function is called, but this is not required.

+// If a process is terminating and unreaped, there may be a window between the

+// time that kqueue will no longer recognize it and when it becomes an actual

+// zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is

+// detected when kqueue indicates that the process is not running and a

+// non-blocking waitpid fails to reap the process but indicates that it is

+// still running. In this event, a blocking attempt to reap the process

+// collects the known-dying child, preventing zombies from congregating.

+//

+// In the event that the kqueue misbehaves entirely, as it might under a

+// EMFILE condition ("too many open files", or out of file descriptors), this

+// function will forcibly kill and reap the child without delay. This

+// eliminates another potential zombie vector. (If you're out of file

+// descriptors, you're probably deep into something else, but that doesn't

+// mean that zombies be allowed to kick you while you're down.)

+//

+// The fact that this function seemingly can be called to wait on a child

+// that's not only already terminated but already reaped is a bit of a

+// problem: a reaped child's pid can be reclaimed and may refer to a distinct

+// process in that case. The fact that this function can seemingly be called

+// to wait on a process that's not even a child is also a problem: kqueue will

+// work in that case, but waitpid won't, and killing a non-child might not be

+// the best approach.

+void WaitForChildToDie(pid_t child, int timeout) {

+ DCHECK(child > 0);

+ DCHECK(timeout > 0);

+ // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that

+ // |child| has been reaped. Specifically, even if a kqueue, kevent, or other

+ // call fails, this function should fall back to the last resort of trying

+ // to kill and reap the process. Not observing this rule will resurrect

+ // zombies.

+ int result;

int kq = HANDLE_EINTR(kqueue());

- file_util::ScopedFD auto_close(&kq);

if (kq == -1) {

- PLOG(ERROR) << "Failed to create kqueue";

- return;

- }

+ PLOG(ERROR) << "kqueue()";

+ } else {

+ file_util::ScopedFD auto_close_kq(&kq);

- struct kevent event_to_add = {0};

- EV_SET(&event_to_add, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);

- // Register interest with kqueue.

- int result = HANDLE_EINTR(kevent(kq, &event_to_add, 1, NULL, 0, NULL));

- if (result == -1 && errno == ESRCH) {

- // A "No Such Process" error is fine, the process may have died already

- // and been reaped by someone else. But make sure that it was/is reaped.

- // Don't report an error in case it was already reaped.

- HANDLE_EINTR(waitpid(child, NULL, WNOHANG));

- return;

- }

+ struct kevent change = {0};

+ EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);

+ result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL));

- if (result == -1) {

- PLOG(ERROR) << "Failed to register event to listen for death of pid "

- << child;

- return;

- }

+ if (result == -1) {

+ if (errno != ESRCH) {

+ PLOG(ERROR) << "kevent (setup " << child << ")";

+ } else {

+ // At this point, one of the following has occurred:

+ // 1. The process has died but has not yet been reaped.

+ // 2. The process has died and has already been reaped.

+ // 3. The process is in the process of dying. It's no longer

+ // kqueueable, but it may not be waitable yet either. Mark calls

+ // this case the "zombie death race".

- struct kevent event = {0};

+ result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));

- DCHECK(timeout != 0);

+ if (result != 0) {

+ // A positive result indicates case 1. waitpid succeeded and reaped

+ // the child. A result of -1 indicates case 2. The child has already

+ // been reaped. In both of these cases, no further action is

+ // necessary.

+ return;

+ }

- int num_processes_that_died = -1;

- using base::Time;

- using base::TimeDelta;

- // We need to keep track of the elapsed time - if kevent() returns

- // EINTR in the middle of blocking call we want to make up what's left

- // of the timeout.

- TimeDelta time_left = TimeDelta::FromSeconds(timeout);

- Time wakeup = Time::Now() + time_left;

- while(time_left.InMilliseconds() > 0) {

- const struct timespec timeout = time_left.ToTimeSpec();

- num_processes_that_died = kevent(kq, NULL, 0, &event, 1, &timeout);

- if (num_processes_that_died >= 0)

- break;

- if (num_processes_that_died == -1 && errno == EINTR) {

- time_left = wakeup - Time::Now();

- continue;

- }

+ // |result| is 0, indicating case 3. The process will be waitable in

+ // short order. Fall back out of the kqueue code to kill it (for good

+ // measure) and reap it.

+ }

+ } else {

+ // Keep track of the elapsed time to be able to restart kevent if it's

+ // interrupted.

+ base::TimeDelta remaining_delta = base::TimeDelta::FromSeconds(timeout);

+ base::Time deadline = base::Time::Now() + remaining_delta;

+ result = -1;

+ struct kevent event = {0};

+ while (remaining_delta.InMilliseconds() > 0) {

+ const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();

+ result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec);

+ if (result == -1 && errno == EINTR) {

+ remaining_delta = deadline - base::Time::Now();

+ result = 0;

+ } else {

+ break;

+ }

- // If we got here, kevent() must have returned -1.

- PLOG(ERROR) << "kevent() failed";

- break;

- }

- if (num_processes_that_died == -1) {

- PLOG(ERROR) << "kevent failed";

- return;

- }

- if (num_processes_that_died == 1) {

- if (event.fflags & NOTE_EXIT &&

- event.ident == static_cast<uintptr_t>(child)) {

- // Process died, it's safe to make a blocking call here since the

- // kqueue() notification occurs when the process is already zombified.

- BlockingReap(child);

- return;

- } else {

- PLOG(ERROR) << "kevent() returned unexpected result - ke.fflags ="

- << event.fflags

- << " ke.ident ="

- << event.ident

- << " while listening for pid="

- << child;

+ if (result == -1) {

+ PLOG(ERROR) << "kevent (wait " << child << ")";

+ } else if (result > 1) {

+ LOG(ERROR) << "kevent (wait " << child << "): unexpected result "

+ << result;

+ } else if (result == 1) {

+ if ((event.fflags & NOTE_EXIT) &&

+ (event.ident == static_cast<uintptr_t>(child))) {

+ // The process is dead or dying. This won't block for long, if at

+ // all.

+ BlockingReap(child);

+ return;

+ } else {

+ LOG(ERROR) << "kevent (wait " << child

+ << "): unexpected event: fflags=" << event.fflags

+ << ", ident=" << event.ident;

+ }

}

- // If we got here the child is still alive so kill it...

- if (kill(child, SIGKILL) == 0) {

- // SIGKILL is uncatchable. Since the signal was delivered, we can

- // just wait for the process to die now in a blocking manner.

+ // The child is still alive, or is very freshly dead. Be sure by sending it

+ // a signal. This is safe even if it's freshly dead, because it will be a

+ // zombie (or on the way to zombiedom) and kill will return 0 even if the

+ // signal is not delivered to a live process.

+ result = kill(child, SIGKILL);

+ if (result == -1) {

+ PLOG(ERROR) << "kill(" << child << ", SIGKILL)";

+ } else {

+ // The child is definitely on the way out now. BlockingReap won't need to

+ // wait for long, if at all.

BlockingReap(child);

- } else {

- PLOG(ERROR) << "While waiting for " << child << " to terminate we"

- << " failed to deliver a SIGKILL signal";

}

} // namespace

void ProcessWatcher::EnsureProcessTerminated(base::ProcessHandle process) {

- WaitForChildToDie(process, 2);

+ WaitForChildToDie(process, kWaitBeforeKillSeconds);

}

« no previous file with comments | « no previous file | no next file » | no next file with comments »