Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(996)

Unified Diff: base/process_util_posix.cc

Issue 5377001: base: wait for children to terminate. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/process_util_posix.cc
diff --git a/base/process_util_posix.cc b/base/process_util_posix.cc
index 8a97dcd58d95f4cce85f44415732980f26eee198..d53ab998dde35712c8997a821fbadd9d4261d04e 100644
--- a/base/process_util_posix.cc
+++ b/base/process_util_posix.cc
@@ -40,8 +40,10 @@ namespace base {
namespace {
-int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,
- bool* success) {
+// WaitpidWithTimeout performs a timed waitpid() call. The arguments and return
+// value match waitpid(2).
+pid_t WaitpidWithTimeout(ProcessHandle handle, int* status,
+ int64 wait_milliseconds) {
// This POSIX version of this function only guarantees that we wait no less
// than |wait_milliseconds| for the process to exit. The child process may
// exit sometime before the timeout has ended but we may still block for up
@@ -63,13 +65,15 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,
// has been installed. This means that when a SIGCHLD is sent, it will exit
// depending on behavior external to this function.
//
- // This function is used primarily for unit tests, if we want to use it in
- // the application itself it would probably be best to examine other routes.
- int status = -1;
- pid_t ret_pid = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));
+ // This function is used in unit tests and by DidProcessCrash. See the
+ // comments in DidProcessCrash about its use in Chrome itself.
static const int64 kMaxSleepInMicroseconds = 1 << 18; // ~256 milliseconds.
int64 max_sleep_time_usecs = 1 << 10; // ~1 milliseconds.
- int64 double_sleep_time = 0;
+ int double_sleep_time = 0;
+
+ pid_t ret_pid = HANDLE_EINTR(waitpid(handle, status, WNOHANG));
+ if (ret_pid != 0)
+ return ret_pid;
// If the process hasn't exited yet, then sleep and try again.
Time wakeup_time = Time::Now() +
@@ -87,7 +91,7 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,
// usleep() will return 0 and set errno to EINTR on receipt of a signal
// such as SIGCHLD.
usleep(sleep_time_usecs);
- ret_pid = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));
+ ret_pid = HANDLE_EINTR(waitpid(handle, status, WNOHANG));
if ((max_sleep_time_usecs < kMaxSleepInMicroseconds) &&
(double_sleep_time++ % 4 == 0)) {
@@ -95,10 +99,7 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds,
}
}
- if (success)
- *success = (ret_pid != -1);
-
- return status;
+ return ret_pid;
}
void StackDumpSignalHandler(int signal, siginfo_t* info, ucontext_t* context) {
@@ -631,15 +632,27 @@ void RaiseProcessToHighPriority() {
}
bool DidProcessCrash(bool* child_exited, ProcessHandle handle) {
+ // We call DidProcessCrash when we see an EOF from a socket to that child.
+ // Sadly, the kernel is raceable: it will close a dead process's file
+ // descriptors before marking the process as dead. Therefore it's possible to
+ // see the EOF and still have a subsequent waitpid() return zero. (Had I
+ // known that from the beginning, child processing would have been designed
+ // differently.)
+ //
+ // Everything higher up copes with the case where waitpid fails, but we might
+ // lose some crash notifications. In order to minimise this we waitpid() in a
+ // loop with a timeout to try and catch the exit code. In the typical case,
+ // we should only be blocking for microseconds here.
+ //
+ // It's possible for a bad child to close its socket without exiting. Because
+ // we don't want to hang the browser in this case we have a timeout.
int status;
- const pid_t result = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));
- if (result == -1) {
- PLOG(ERROR) << "waitpid(" << handle << ")";
- if (child_exited)
- *child_exited = false;
- return false;
- } else if (result == 0) {
- // the child hasn't exited yet.
+ pid_t pid = WaitpidWithTimeout(handle, &status, 250 /* milliseconds */);
+
+ if (pid == 0) {
+ // We waited for the full timeout and the child still isn't dead.
+ LOG(ERROR) << "We janked because we expected child "
+ << handle << " to have exited.";
if (child_exited)
*child_exited = false;
return false;
@@ -685,12 +698,10 @@ bool WaitForExitCode(ProcessHandle handle, int* exit_code) {
bool WaitForExitCodeWithTimeout(ProcessHandle handle, int* exit_code,
int64 timeout_milliseconds) {
- bool waitpid_success = false;
- int status = WaitpidWithTimeout(handle, timeout_milliseconds,
- &waitpid_success);
- if (status == -1)
- return false;
- if (!waitpid_success)
+ int status;
+ pid_t pid = WaitpidWithTimeout(handle, &status, timeout_milliseconds);
+
+ if (pid <= 0)
return false;
if (!WIFEXITED(status))
return false;
@@ -703,30 +714,31 @@ bool WaitForExitCodeWithTimeout(ProcessHandle handle, int* exit_code,
}
bool WaitForSingleProcess(ProcessHandle handle, int64 wait_milliseconds) {
- bool waitpid_success;
int status;
- if (wait_milliseconds == base::kNoTimeout)
- waitpid_success = (HANDLE_EINTR(waitpid(handle, &status, 0)) != -1);
- else
- status = WaitpidWithTimeout(handle, wait_milliseconds, &waitpid_success);
- if (status != -1) {
- DCHECK(waitpid_success);
- return WIFEXITED(status);
+ pid_t pid;
+ if (wait_milliseconds == base::kNoTimeout) {
+ pid = (HANDLE_EINTR(waitpid(handle, &status, 0)) == handle);
} else {
- return false;
+ pid = WaitpidWithTimeout(handle, &status, wait_milliseconds);
}
+
+ if (pid <= 0)
+ return false;
+
+ return WIFEXITED(status);
}
bool CrashAwareSleep(ProcessHandle handle, int64 wait_milliseconds) {
- bool waitpid_success;
- int status = WaitpidWithTimeout(handle, wait_milliseconds, &waitpid_success);
- if (status != -1) {
- DCHECK(waitpid_success);
- return !(WIFEXITED(status) || WIFSIGNALED(status));
+ int status;
+ pid_t pid = WaitpidWithTimeout(handle, &status, wait_milliseconds);
+ if (pid < 0) {
+ // If waitpid failed then the process probably didn't exist prior to the
+ // call.
+ return true;
+ } else if (pid == 0) {
+ return false;
} else {
- // If waitpid returned with an error, then the process doesn't exist
- // (which most probably means it didn't exist before our call).
- return waitpid_success;
+ return !(WIFEXITED(status) || WIFSIGNALED(status));
}
}
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698