base/process_util_posix.cc - Issue 5377001: base: wait for children to terminate.

Unified Diff: base/process_util_posix.cc

Issue 5377001: base: wait for children to terminate. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/process_util_posix.cc

diff --git a/base/process_util_posix.cc b/base/process_util_posix.cc

index 8a97dcd58d95f4cce85f44415732980f26eee198..d53ab998dde35712c8997a821fbadd9d4261d04e 100644

--- a/base/process_util_posix.cc

+++ b/base/process_util_posix.cc

@@ -40,8 +40,10 @@ namespace base {

namespace {

-int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,

- bool* success) {

+// WaitpidWithTimeout performs a timed waitpid() call. The arguments and return

+// value match waitpid(2).

+pid_t WaitpidWithTimeout(ProcessHandle handle, int* status,

+ int64 wait_milliseconds) {

// This POSIX version of this function only guarantees that we wait no less

// than |wait_milliseconds| for the process to exit. The child process may

// exit sometime before the timeout has ended but we may still block for up

@@ -63,13 +65,15 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,

// has been installed. This means that when a SIGCHLD is sent, it will exit

// depending on behavior external to this function.

- // This function is used primarily for unit tests, if we want to use it in

- // the application itself it would probably be best to examine other routes.

- int status = -1;

- pid_t ret_pid = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));

+ // This function is used in unit tests and by DidProcessCrash. See the

+ // comments in DidProcessCrash about its use in Chrome itself.

static const int64 kMaxSleepInMicroseconds = 1 << 18; // ~256 milliseconds.

int64 max_sleep_time_usecs = 1 << 10; // ~1 milliseconds.

- int64 double_sleep_time = 0;

+ int double_sleep_time = 0;

+ pid_t ret_pid = HANDLE_EINTR(waitpid(handle, status, WNOHANG));

+ if (ret_pid != 0)

+ return ret_pid;

// If the process hasn't exited yet, then sleep and try again.

Time wakeup_time = Time::Now() +

@@ -87,7 +91,7 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,

// usleep() will return 0 and set errno to EINTR on receipt of a signal

// such as SIGCHLD.

usleep(sleep_time_usecs);

- ret_pid = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));

+ ret_pid = HANDLE_EINTR(waitpid(handle, status, WNOHANG));

if ((max_sleep_time_usecs < kMaxSleepInMicroseconds) &&

(double_sleep_time++ % 4 == 0)) {

@@ -95,10 +99,7 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,

}

- if (success)

- *success = (ret_pid != -1);

- return status;

+ return ret_pid;

}

void StackDumpSignalHandler(int signal, siginfo_t* info, ucontext_t* context) {

@@ -631,15 +632,27 @@ void RaiseProcessToHighPriority() {

}

bool DidProcessCrash(bool* child_exited, ProcessHandle handle) {

+ // We call DidProcessCrash when we see an EOF from a socket to that child.

+ // Sadly, the kernel is raceable: it will close a dead process's file

+ // descriptors before marking the process as dead. Therefore it's possible to

+ // see the EOF and still have a subsequent waitpid() return zero. (Had I

+ // known that from the beginning, child processing would have been designed

+ // differently.)

+ //

+ // Everything higher up copes with the case where waitpid fails, but we might

+ // lose some crash notifications. In order to minimise this we waitpid() in a

+ // loop with a timeout to try and catch the exit code. In the typical case,

+ // we should only be blocking for microseconds here.

+ //

+ // It's possible for a bad child to close its socket without exiting. Because

+ // we don't want to hang the browser in this case we have a timeout.

int status;

- const pid_t result = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));

- if (result == -1) {

- PLOG(ERROR) << "waitpid(" << handle << ")";

- if (child_exited)

- *child_exited = false;

- return false;

- } else if (result == 0) {

- // the child hasn't exited yet.

+ pid_t pid = WaitpidWithTimeout(handle, &status, 250 /* milliseconds */);

+ if (pid == 0) {

+ // We waited for the full timeout and the child still isn't dead.

+ LOG(ERROR) << "We janked because we expected child "

+ << handle << " to have exited.";

if (child_exited)

*child_exited = false;

return false;

@@ -685,12 +698,10 @@ bool WaitForExitCode(ProcessHandle handle, int* exit_code) {

bool WaitForExitCodeWithTimeout(ProcessHandle handle, int* exit_code,

int64 timeout_milliseconds) {

- bool waitpid_success = false;

- int status = WaitpidWithTimeout(handle, timeout_milliseconds,

- &waitpid_success);

- if (status == -1)

- return false;

- if (!waitpid_success)

+ int status;

+ pid_t pid = WaitpidWithTimeout(handle, &status, timeout_milliseconds);

+ if (pid <= 0)

return false;

if (!WIFEXITED(status))

return false;

@@ -703,30 +714,31 @@ bool WaitForExitCodeWithTimeout(ProcessHandle handle, int* exit_code,

}

bool WaitForSingleProcess(ProcessHandle handle, int64 wait_milliseconds) {

- bool waitpid_success;

int status;

- if (wait_milliseconds == base::kNoTimeout)

- waitpid_success = (HANDLE_EINTR(waitpid(handle, &status, 0)) != -1);

- else

- status = WaitpidWithTimeout(handle, wait_milliseconds, &waitpid_success);

- if (status != -1) {

- DCHECK(waitpid_success);

- return WIFEXITED(status);

+ pid_t pid;

+ if (wait_milliseconds == base::kNoTimeout) {

+ pid = (HANDLE_EINTR(waitpid(handle, &status, 0)) == handle);

} else {

- return false;

+ pid = WaitpidWithTimeout(handle, &status, wait_milliseconds);

}

+ if (pid <= 0)

+ return false;

+ return WIFEXITED(status);

}

bool CrashAwareSleep(ProcessHandle handle, int64 wait_milliseconds) {

- bool waitpid_success;

- int status = WaitpidWithTimeout(handle, wait_milliseconds, &waitpid_success);

- if (status != -1) {

- DCHECK(waitpid_success);

- return !(WIFEXITED(status) || WIFSIGNALED(status));

+ int status;

+ pid_t pid = WaitpidWithTimeout(handle, &status, wait_milliseconds);

+ if (pid < 0) {

+ // If waitpid failed then the process probably didn't exist prior to the

+ // call.

+ return true;

+ } else if (pid == 0) {

+ return false;

} else {

- // If waitpid returned with an error, then the process doesn't exist

- // (which most probably means it didn't exist before our call).

- return waitpid_success;

+ return !(WIFEXITED(status) || WIFSIGNALED(status));

}

« no previous file with comments | « no previous file | no next file » | no next file with comments »