OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/process_watcher.h" | 5 #include "chrome/common/process_watcher.h" |
6 | 6 |
7 #include <errno.h> | 7 #include <errno.h> |
8 #include <signal.h> | 8 #include <signal.h> |
9 #include <sys/event.h> | 9 #include <sys/event.h> |
10 #include <sys/types.h> | 10 #include <sys/types.h> |
11 #include <sys/wait.h> | 11 #include <sys/wait.h> |
12 | 12 |
13 #include "base/eintr_wrapper.h" | 13 #include "base/eintr_wrapper.h" |
14 #include "base/file_util.h" | 14 #include "base/file_util.h" |
15 #include "base/time.h" | 15 #include "base/time.h" |
16 | 16 |
17 namespace { | 17 namespace { |
18 | 18 |
19 // Reap |child| process. | 19 const int kWaitBeforeKillSeconds = 2; |
20 // This call blocks until completion. | 20 |
| 21 // Reap |child| process. This call blocks until completion. |
21 void BlockingReap(pid_t child) { | 22 void BlockingReap(pid_t child) { |
22 const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0)); | 23 const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0)); |
23 if (result == -1) { | 24 if (result == -1) { |
24 PLOG(ERROR) << "waitpid(" << child << ")"; | 25 PLOG(ERROR) << "waitpid(" << child << ", NULL, 0)"; |
25 NOTREACHED(); | |
26 } | 26 } |
27 } | 27 } |
28 | 28 |
29 // Waits for |timeout| seconds for the given |child| to exit and reap it. | 29 // Waits for |timeout| seconds for the given |child| to exit and reap it. If |
30 // If the child doesn't exit within a couple of seconds, kill it. | 30 // the child doesn't exit within the time specified, kills it. |
31 void WaitForChildToDie(pid_t child, unsigned timeout) { | 31 // |
| 32 // This function takes two approaches: first, it tries to use kqueue to |
| 33 // observe when the process exits. kevent can monitor a kqueue with a |
| 34 // timeout, so this method is preferred to wait for a specified period of |
| 35 // time. Once the kqueue indicates the process has exited, waitpid will reap |
| 36 // the exited child. If the kqueue doesn't provide an exit event notification, |
| 37 // before the timeout expires, or if the kqueue fails or misbehaves, the |
| 38 // process will be mercilessly killed and reaped. |
| 39 // |
| 40 // A child process passed to this function may be in one of several states: |
| 41 // running, terminated and not yet reaped, and (apparently, and unfortunately) |
| 42 // terminated and already reaped. Normally, a process will at least have been |
| 43 // asked to exit before this function is called, but this is not required. |
| 44 // If a process is terminating and unreaped, there may be a window between the |
| 45 // time that kqueue will no longer recognize it and when it becomes an actual |
| 46 // zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is |
| 47 // detected when kqueue indicates that the process is not running and a |
| 48 // non-blocking waitpid fails to reap the process but indicates that it is |
| 49 // still running. In this event, a blocking attempt to reap the process |
| 50 // collects the known-dying child, preventing zombies from congregating. |
| 51 // |
| 52 // In the event that the kqueue misbehaves entirely, as it might under a |
| 53 // EMFILE condition ("too many open files", or out of file descriptors), this |
| 54 // function will forcibly kill and reap the child without delay. This |
| 55 // eliminates another potential zombie vector. (If you're out of file |
| 56 // descriptors, you're probably deep into something else, but that doesn't |
| 57 // mean that zombies be allowed to kick you while you're down.) |
| 58 // |
| 59 // The fact that this function seemingly can be called to wait on a child |
| 60 // that's not only already terminated but already reaped is a bit of a |
| 61 // problem: a reaped child's pid can be reclaimed and may refer to a distinct |
| 62 // process in that case. The fact that this function can seemingly be called |
| 63 // to wait on a process that's not even a child is also a problem: kqueue will |
| 64 // work in that case, but waitpid won't, and killing a non-child might not be |
| 65 // the best approach. |
| 66 void WaitForChildToDie(pid_t child, int timeout) { |
| 67 DCHECK(child > 0); |
| 68 DCHECK(timeout > 0); |
| 69 |
| 70 // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that |
| 71 // |child| has been reaped. Specifically, even if a kqueue, kevent, or other |
| 72 // call fails, this function should fall back to the last resort of trying |
| 73 // to kill and reap the process. Not observing this rule will resurrect |
| 74 // zombies. |
| 75 |
| 76 int result; |
| 77 |
32 int kq = HANDLE_EINTR(kqueue()); | 78 int kq = HANDLE_EINTR(kqueue()); |
33 file_util::ScopedFD auto_close(&kq); | |
34 if (kq == -1) { | 79 if (kq == -1) { |
35 PLOG(ERROR) << "Failed to create kqueue"; | 80 PLOG(ERROR) << "kqueue()"; |
36 return; | 81 } else { |
37 } | 82 file_util::ScopedFD auto_close_kq(&kq); |
38 | 83 |
39 struct kevent event_to_add = {0}; | 84 struct kevent change = {0}; |
40 EV_SET(&event_to_add, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL); | 85 EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL); |
41 // Register interest with kqueue. | 86 result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL)); |
42 int result = HANDLE_EINTR(kevent(kq, &event_to_add, 1, NULL, 0, NULL)); | |
43 if (result == -1 && errno == ESRCH) { | |
44 // A "No Such Process" error is fine, the process may have died already | |
45 // and been reaped by someone else. But make sure that it was/is reaped. | |
46 // Don't report an error in case it was already reaped. | |
47 HANDLE_EINTR(waitpid(child, NULL, WNOHANG)); | |
48 return; | |
49 } | |
50 | 87 |
51 if (result == -1) { | 88 if (result == -1) { |
52 PLOG(ERROR) << "Failed to register event to listen for death of pid " | 89 if (errno != ESRCH) { |
53 << child; | 90 PLOG(ERROR) << "kevent (setup " << child << ")"; |
54 return; | 91 } else { |
55 } | 92 // At this point, one of the following has occurred: |
| 93 // 1. The process has died but has not yet been reaped. |
| 94 // 2. The process has died and has already been reaped. |
| 95 // 3. The process is in the process of dying. It's no longer |
| 96 // kqueueable, but it may not be waitable yet either. Mark calls |
| 97 // this case the "zombie death race". |
56 | 98 |
57 struct kevent event = {0}; | 99 result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG)); |
58 | 100 |
59 DCHECK(timeout != 0); | 101 if (result != 0) { |
| 102 // A positive result indicates case 1. waitpid succeeded and reaped |
| 103 // the child. A result of -1 indicates case 2. The child has already |
| 104 // been reaped. In both of these cases, no further action is |
| 105 // necessary. |
| 106 return; |
| 107 } |
60 | 108 |
61 int num_processes_that_died = -1; | 109 // |result| is 0, indicating case 3. The process will be waitable in |
62 using base::Time; | 110 // short order. Fall back out of the kqueue code to kill it (for good |
63 using base::TimeDelta; | 111 // measure) and reap it. |
64 // We need to keep track of the elapsed time - if kevent() returns | 112 } |
65 // EINTR in the middle of blocking call we want to make up what's left | 113 } else { |
66 // of the timeout. | 114 // Keep track of the elapsed time to be able to restart kevent if it's |
67 TimeDelta time_left = TimeDelta::FromSeconds(timeout); | 115 // interrupted. |
68 Time wakeup = Time::Now() + time_left; | 116 base::TimeDelta remaining_delta = base::TimeDelta::FromSeconds(timeout); |
69 while(time_left.InMilliseconds() > 0) { | 117 base::Time deadline = base::Time::Now() + remaining_delta; |
70 const struct timespec timeout = time_left.ToTimeSpec(); | 118 result = -1; |
71 num_processes_that_died = kevent(kq, NULL, 0, &event, 1, &timeout); | 119 struct kevent event = {0}; |
72 if (num_processes_that_died >= 0) | 120 while (remaining_delta.InMilliseconds() > 0) { |
73 break; | 121 const struct timespec remaining_timespec = remaining_delta.ToTimeSpec(); |
74 if (num_processes_that_died == -1 && errno == EINTR) { | 122 result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec); |
75 time_left = wakeup - Time::Now(); | 123 if (result == -1 && errno == EINTR) { |
76 continue; | 124 remaining_delta = deadline - base::Time::Now(); |
77 } | 125 result = 0; |
| 126 } else { |
| 127 break; |
| 128 } |
| 129 } |
78 | 130 |
79 // If we got here, kevent() must have returned -1. | 131 if (result == -1) { |
80 PLOG(ERROR) << "kevent() failed"; | 132 PLOG(ERROR) << "kevent (wait " << child << ")"; |
81 break; | 133 } else if (result > 1) { |
82 } | 134 LOG(ERROR) << "kevent (wait " << child << "): unexpected result " |
83 | 135 << result; |
84 if (num_processes_that_died == -1) { | 136 } else if (result == 1) { |
85 PLOG(ERROR) << "kevent failed"; | 137 if ((event.fflags & NOTE_EXIT) && |
86 return; | 138 (event.ident == static_cast<uintptr_t>(child))) { |
87 } | 139 // The process is dead or dying. This won't block for long, if at |
88 if (num_processes_that_died == 1) { | 140 // all. |
89 if (event.fflags & NOTE_EXIT && | 141 BlockingReap(child); |
90 event.ident == static_cast<uintptr_t>(child)) { | 142 return; |
91 // Process died, it's safe to make a blocking call here since the | 143 } else { |
92 // kqueue() notification occurs when the process is already zombified. | 144 LOG(ERROR) << "kevent (wait " << child |
93 BlockingReap(child); | 145 << "): unexpected event: fflags=" << event.fflags |
94 return; | 146 << ", ident=" << event.ident; |
95 } else { | 147 } |
96 PLOG(ERROR) << "kevent() returned unexpected result - ke.fflags =" | 148 } |
97 << event.fflags | |
98 << " ke.ident =" | |
99 << event.ident | |
100 << " while listening for pid=" | |
101 << child; | |
102 } | 149 } |
103 } | 150 } |
104 | 151 |
105 // If we got here the child is still alive so kill it... | 152 // The child is still alive, or is very freshly dead. Be sure by sending it |
106 if (kill(child, SIGKILL) == 0) { | 153 // a signal. This is safe even if it's freshly dead, because it will be a |
107 // SIGKILL is uncatchable. Since the signal was delivered, we can | 154 // zombie (or on the way to zombiedom) and kill will return 0 even if the |
108 // just wait for the process to die now in a blocking manner. | 155 // signal is not delivered to a live process. |
| 156 result = kill(child, SIGKILL); |
| 157 if (result == -1) { |
| 158 PLOG(ERROR) << "kill(" << child << ", SIGKILL)"; |
| 159 } else { |
| 160 // The child is definitely on the way out now. BlockingReap won't need to |
| 161 // wait for long, if at all. |
109 BlockingReap(child); | 162 BlockingReap(child); |
110 } else { | |
111 PLOG(ERROR) << "While waiting for " << child << " to terminate we" | |
112 << " failed to deliver a SIGKILL signal"; | |
113 } | 163 } |
114 } | 164 } |
115 | 165 |
116 } // namespace | 166 } // namespace |
117 | 167 |
118 void ProcessWatcher::EnsureProcessTerminated(base::ProcessHandle process) { | 168 void ProcessWatcher::EnsureProcessTerminated(base::ProcessHandle process) { |
119 WaitForChildToDie(process, 2); | 169 WaitForChildToDie(process, kWaitBeforeKillSeconds); |
120 } | 170 } |
OLD | NEW |