Index: base/process_util_posix.cc |
diff --git a/base/process_util_posix.cc b/base/process_util_posix.cc |
index 8a97dcd58d95f4cce85f44415732980f26eee198..d53ab998dde35712c8997a821fbadd9d4261d04e 100644 |
--- a/base/process_util_posix.cc |
+++ b/base/process_util_posix.cc |
@@ -40,8 +40,10 @@ namespace base { |
namespace { |
-int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds, |
- bool* success) { |
+// WaitpidWithTimeout performs a timed waitpid() call. The arguments and return |
+// value match waitpid(2). |
+pid_t WaitpidWithTimeout(ProcessHandle handle, int* status, |
+ int64 wait_milliseconds) { |
// This POSIX version of this function only guarantees that we wait no less |
// than |wait_milliseconds| for the process to exit. The child process may |
// exit sometime before the timeout has ended but we may still block for up |
@@ -63,13 +65,15 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds, |
// has been installed. This means that when a SIGCHLD is sent, it will exit |
// depending on behavior external to this function. |
// |
- // This function is used primarily for unit tests, if we want to use it in |
- // the application itself it would probably be best to examine other routes. |
- int status = -1; |
- pid_t ret_pid = HANDLE_EINTR(waitpid(handle, &status, WNOHANG)); |
+ // This function is used in unit tests and by DidProcessCrash. See the |
+ // comments in DidProcessCrash about its use in Chrome itself. |
static const int64 kMaxSleepInMicroseconds = 1 << 18; // ~256 milliseconds. |
int64 max_sleep_time_usecs = 1 << 10; // ~1 milliseconds. |
- int64 double_sleep_time = 0; |
+ int double_sleep_time = 0; |
+ |
+ pid_t ret_pid = HANDLE_EINTR(waitpid(handle, status, WNOHANG)); |
+ if (ret_pid != 0) |
+ return ret_pid; |
// If the process hasn't exited yet, then sleep and try again. |
Time wakeup_time = Time::Now() + |
@@ -87,7 +91,7 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds, |
// usleep() will return 0 and set errno to EINTR on receipt of a signal |
// such as SIGCHLD. |
usleep(sleep_time_usecs); |
- ret_pid = HANDLE_EINTR(waitpid(handle, &status, WNOHANG)); |
+ ret_pid = HANDLE_EINTR(waitpid(handle, status, WNOHANG)); |
if ((max_sleep_time_usecs < kMaxSleepInMicroseconds) && |
(double_sleep_time++ % 4 == 0)) { |
@@ -95,10 +99,7 @@ int WaitpidWithTimeout(ProcessHandle handle, int64 wait_milliseconds, |
} |
} |
- if (success) |
- *success = (ret_pid != -1); |
- |
- return status; |
+ return ret_pid; |
} |
void StackDumpSignalHandler(int signal, siginfo_t* info, ucontext_t* context) { |
@@ -631,15 +632,27 @@ void RaiseProcessToHighPriority() { |
} |
bool DidProcessCrash(bool* child_exited, ProcessHandle handle) { |
+ // We call DidProcessCrash when we see an EOF from a socket to that child. |
+ // Sadly, the kernel is raceable: it will close a dead process's file |
+ // descriptors before marking the process as dead. Therefore it's possible to |
+ // see the EOF and still have a subsequent waitpid() return zero. (Had I |
+ // known that from the beginning, child processing would have been designed |
+ // differently.) |
+ // |
+ // Everything higher up copes with the case where waitpid fails, but we might |
+ // lose some crash notifications. In order to minimise this we waitpid() in a |
+ // loop with a timeout to try and catch the exit code. In the typical case, |
+ // we should only be blocking for microseconds here. |
+ // |
+ // It's possible for a bad child to close its socket without exiting. Because |
+ // we don't want to hang the browser in this case we have a timeout. |
int status; |
- const pid_t result = HANDLE_EINTR(waitpid(handle, &status, WNOHANG)); |
- if (result == -1) { |
- PLOG(ERROR) << "waitpid(" << handle << ")"; |
- if (child_exited) |
- *child_exited = false; |
- return false; |
- } else if (result == 0) { |
- // the child hasn't exited yet. |
+ pid_t pid = WaitpidWithTimeout(handle, &status, 250 /* milliseconds */); |
+ |
+ if (pid == 0) { |
+ // We waited for the full timeout and the child still isn't dead. |
+ LOG(ERROR) << "We janked because we expected child " |
+ << handle << " to have exited."; |
if (child_exited) |
*child_exited = false; |
return false; |
@@ -685,12 +698,10 @@ bool WaitForExitCode(ProcessHandle handle, int* exit_code) { |
bool WaitForExitCodeWithTimeout(ProcessHandle handle, int* exit_code, |
int64 timeout_milliseconds) { |
- bool waitpid_success = false; |
- int status = WaitpidWithTimeout(handle, timeout_milliseconds, |
- &waitpid_success); |
- if (status == -1) |
- return false; |
- if (!waitpid_success) |
+ int status; |
+ pid_t pid = WaitpidWithTimeout(handle, &status, timeout_milliseconds); |
+ |
+ if (pid <= 0) |
return false; |
if (!WIFEXITED(status)) |
return false; |
@@ -703,30 +714,31 @@ bool WaitForExitCodeWithTimeout(ProcessHandle handle, int* exit_code, |
} |
bool WaitForSingleProcess(ProcessHandle handle, int64 wait_milliseconds) { |
- bool waitpid_success; |
int status; |
- if (wait_milliseconds == base::kNoTimeout) |
- waitpid_success = (HANDLE_EINTR(waitpid(handle, &status, 0)) != -1); |
- else |
- status = WaitpidWithTimeout(handle, wait_milliseconds, &waitpid_success); |
- if (status != -1) { |
- DCHECK(waitpid_success); |
- return WIFEXITED(status); |
+ pid_t pid; |
+ if (wait_milliseconds == base::kNoTimeout) { |
+ pid = (HANDLE_EINTR(waitpid(handle, &status, 0)) == handle); |
} else { |
- return false; |
+ pid = WaitpidWithTimeout(handle, &status, wait_milliseconds); |
} |
+ |
+ if (pid <= 0) |
+ return false; |
+ |
+ return WIFEXITED(status); |
} |
bool CrashAwareSleep(ProcessHandle handle, int64 wait_milliseconds) { |
- bool waitpid_success; |
- int status = WaitpidWithTimeout(handle, wait_milliseconds, &waitpid_success); |
- if (status != -1) { |
- DCHECK(waitpid_success); |
- return !(WIFEXITED(status) || WIFSIGNALED(status)); |
+ int status; |
+ pid_t pid = WaitpidWithTimeout(handle, &status, wait_milliseconds); |
+ if (pid < 0) { |
+ // If waitpid failed then the process probably didn't exist prior to the |
+ // call. |
+ return true; |
+ } else if (pid == 0) { |
+ return false; |
} else { |
- // If waitpid returned with an error, then the process doesn't exist |
- // (which most probably means it didn't exist before our call). |
- return waitpid_success; |
+ return !(WIFEXITED(status) || WIFSIGNALED(status)); |
} |
} |