OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/gpu/gpu_watchdog_thread.h" | |
6 | |
7 #include <errno.h> | |
8 #include <stdint.h> | |
9 | |
10 #include "base/bind.h" | |
11 #include "base/bind_helpers.h" | |
12 #include "base/command_line.h" | |
13 #include "base/compiler_specific.h" | |
14 #include "base/debug/alias.h" | |
15 #include "base/files/file_util.h" | |
16 #include "base/location.h" | |
17 #include "base/macros.h" | |
18 #include "base/power_monitor/power_monitor.h" | |
19 #include "base/process/process.h" | |
20 #include "base/single_thread_task_runner.h" | |
21 #include "base/threading/platform_thread.h" | |
22 #include "build/build_config.h" | |
23 #include "content/public/common/content_switches.h" | |
24 #include "content/public/common/result_codes.h" | |
25 | |
26 #if defined(OS_WIN) | |
27 #include <windows.h> | |
28 #endif | |
29 | |
30 namespace content { | |
31 namespace { | |
32 #if defined(USE_X11) | |
33 const base::FilePath::CharType | |
34 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active"); | |
35 const unsigned char text[20] = "check"; | |
36 #endif | |
37 } // namespace | |
38 | |
39 GpuWatchdogThread::GpuWatchdogThread(int timeout) | |
40 : base::Thread("Watchdog"), | |
41 watched_message_loop_(base::MessageLoop::current()), | |
42 timeout_(base::TimeDelta::FromMilliseconds(timeout)), | |
43 armed_(false), | |
44 task_observer_(this), | |
45 use_thread_cpu_time_(true), | |
46 responsive_acknowledge_count_(0), | |
47 #if defined(OS_WIN) | |
48 watched_thread_handle_(0), | |
49 arm_cpu_time_(), | |
50 #endif | |
51 suspended_(false), | |
52 #if defined(USE_X11) | |
53 display_(NULL), | |
54 window_(0), | |
55 atom_(None), | |
56 host_tty_(-1), | |
57 #endif | |
58 weak_factory_(this) { | |
59 DCHECK(timeout >= 0); | |
60 | |
61 #if defined(OS_WIN) | |
62 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread | |
63 // to identify another. DuplicateHandle creates a "real" handle that can be | |
64 // used for this purpose. | |
65 BOOL result = DuplicateHandle(GetCurrentProcess(), | |
66 GetCurrentThread(), | |
67 GetCurrentProcess(), | |
68 &watched_thread_handle_, | |
69 THREAD_QUERY_INFORMATION, | |
70 FALSE, | |
71 0); | |
72 DCHECK(result); | |
73 #endif | |
74 | |
75 #if defined(USE_X11) | |
76 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r"); | |
77 SetupXServer(); | |
78 #endif | |
79 watched_message_loop_->AddTaskObserver(&task_observer_); | |
80 } | |
81 | |
82 void GpuWatchdogThread::PostAcknowledge() { | |
83 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use | |
84 // the method factory. Rely on reference counting instead. | |
85 task_runner()->PostTask(FROM_HERE, | |
86 base::Bind(&GpuWatchdogThread::OnAcknowledge, this)); | |
87 } | |
88 | |
89 void GpuWatchdogThread::CheckArmed() { | |
90 // Acknowledge the watchdog if it has armed itself. The watchdog will not | |
91 // change its armed state until it is acknowledged. | |
92 if (armed()) { | |
93 PostAcknowledge(); | |
94 } | |
95 } | |
96 | |
97 void GpuWatchdogThread::Init() { | |
98 // Schedule the first check. | |
99 OnCheck(false); | |
100 } | |
101 | |
102 void GpuWatchdogThread::CleanUp() { | |
103 weak_factory_.InvalidateWeakPtrs(); | |
104 } | |
105 | |
106 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver( | |
107 GpuWatchdogThread* watchdog) | |
108 : watchdog_(watchdog) { | |
109 } | |
110 | |
111 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() { | |
112 } | |
113 | |
114 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask( | |
115 const base::PendingTask& pending_task) { | |
116 watchdog_->CheckArmed(); | |
117 } | |
118 | |
119 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask( | |
120 const base::PendingTask& pending_task) { | |
121 } | |
122 | |
123 GpuWatchdogThread::~GpuWatchdogThread() { | |
124 // Verify that the thread was explicitly stopped. If the thread is stopped | |
125 // implicitly by the destructor, CleanUp() will not be called. | |
126 DCHECK(!weak_factory_.HasWeakPtrs()); | |
127 | |
128 #if defined(OS_WIN) | |
129 CloseHandle(watched_thread_handle_); | |
130 #endif | |
131 | |
132 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); | |
133 if (power_monitor) | |
134 power_monitor->RemoveObserver(this); | |
135 | |
136 #if defined(USE_X11) | |
137 if (tty_file_) | |
138 fclose(tty_file_); | |
139 XDestroyWindow(display_, window_); | |
140 XCloseDisplay(display_); | |
141 #endif | |
142 | |
143 watched_message_loop_->RemoveTaskObserver(&task_observer_); | |
144 } | |
145 | |
146 void GpuWatchdogThread::OnAcknowledge() { | |
147 CHECK(base::PlatformThread::CurrentId() == GetThreadId()); | |
148 | |
149 // The check has already been acknowledged and another has already been | |
150 // scheduled by a previous call to OnAcknowledge. It is normal for a | |
151 // watched thread to see armed_ being true multiple times before | |
152 // the OnAcknowledge task is run on the watchdog thread. | |
153 if (!armed_) | |
154 return; | |
155 | |
156 // Revoke any pending hang termination. | |
157 weak_factory_.InvalidateWeakPtrs(); | |
158 armed_ = false; | |
159 | |
160 if (suspended_) { | |
161 responsive_acknowledge_count_ = 0; | |
162 return; | |
163 } | |
164 | |
165 base::Time current_time = base::Time::Now(); | |
166 | |
167 // The watchdog waits until at least 6 consecutive checks have returned in | |
168 // less than 50 ms before it will start ignoring the CPU time in determining | |
169 // whether to timeout. This is a compromise to allow startups that are slow | |
170 // due to disk contention to avoid timing out, but once the GPU process is | |
171 // running smoothly the watchdog will be able to detect hangs that don't use | |
172 // the CPU. | |
173 if ((current_time - check_time_) < base::TimeDelta::FromMilliseconds(50)) | |
174 responsive_acknowledge_count_++; | |
175 else | |
176 responsive_acknowledge_count_ = 0; | |
177 | |
178 if (responsive_acknowledge_count_ >= 6) | |
179 use_thread_cpu_time_ = false; | |
180 | |
181 // If it took a long time for the acknowledgement, assume the computer was | |
182 // recently suspended. | |
183 bool was_suspended = (current_time > suspension_timeout_); | |
184 | |
185 // The monitored thread has responded. Post a task to check it again. | |
186 task_runner()->PostDelayedTask( | |
187 FROM_HERE, base::Bind(&GpuWatchdogThread::OnCheck, | |
188 weak_factory_.GetWeakPtr(), was_suspended), | |
189 0.5 * timeout_); | |
190 } | |
191 | |
192 void GpuWatchdogThread::OnCheck(bool after_suspend) { | |
193 CHECK(base::PlatformThread::CurrentId() == GetThreadId()); | |
194 | |
195 // Do not create any new termination tasks if one has already been created | |
196 // or the system is suspended. | |
197 if (armed_ || suspended_) | |
198 return; | |
199 | |
200 // Must set armed before posting the task. This task might be the only task | |
201 // that will activate the TaskObserver on the watched thread and it must not | |
202 // miss the false -> true transition. | |
203 armed_ = true; | |
204 | |
205 #if defined(OS_WIN) | |
206 arm_cpu_time_ = GetWatchedThreadTime(); | |
207 | |
208 QueryUnbiasedInterruptTime(&arm_interrupt_time_); | |
209 #endif | |
210 | |
211 check_time_ = base::Time::Now(); | |
212 check_timeticks_ = base::TimeTicks::Now(); | |
213 // Immediately after the computer is woken up from being suspended it might | |
214 // be pretty sluggish, so allow some extra time before the next timeout. | |
215 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1); | |
216 suspension_timeout_ = check_time_ + timeout * 2; | |
217 | |
218 // Post a task to the monitored thread that does nothing but wake up the | |
219 // TaskObserver. Any other tasks that are pending on the watched thread will | |
220 // also wake up the observer. This simply ensures there is at least one. | |
221 watched_message_loop_->task_runner()->PostTask(FROM_HERE, | |
222 base::Bind(&base::DoNothing)); | |
223 | |
224 // Post a task to the watchdog thread to exit if the monitored thread does | |
225 // not respond in time. | |
226 task_runner()->PostDelayedTask( | |
227 FROM_HERE, | |
228 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, | |
229 weak_factory_.GetWeakPtr()), | |
230 timeout); | |
231 } | |
232 | |
233 // Use the --disable-gpu-watchdog command line switch to disable this. | |
234 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() { | |
235 // Should not get here while the system is suspended. | |
236 DCHECK(!suspended_); | |
237 | |
238 #if defined(OS_WIN) | |
239 // Defer termination until a certain amount of CPU time has elapsed on the | |
240 // watched thread. | |
241 base::ThreadTicks current_cpu_time = GetWatchedThreadTime(); | |
242 base::TimeDelta time_since_arm = current_cpu_time - arm_cpu_time_; | |
243 if (use_thread_cpu_time_ && (time_since_arm < timeout_)) { | |
244 task_runner()->PostDelayedTask( | |
245 FROM_HERE, | |
246 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, | |
247 weak_factory_.GetWeakPtr()), | |
248 timeout_ - time_since_arm); | |
249 return; | |
250 } | |
251 #endif | |
252 | |
253 // If the watchdog woke up significantly behind schedule, disarm and reset | |
254 // the watchdog check. This is to prevent the watchdog thread from terminating | |
255 // when a machine wakes up from sleep or hibernation, which would otherwise | |
256 // appear to be a hang. | |
257 if (base::Time::Now() > suspension_timeout_) { | |
258 armed_ = false; | |
259 OnCheck(true); | |
260 return; | |
261 } | |
262 | |
263 #if defined(USE_X11) | |
264 XWindowAttributes attributes; | |
265 XGetWindowAttributes(display_, window_, &attributes); | |
266 | |
267 XSelectInput(display_, window_, PropertyChangeMask); | |
268 SetupXChangeProp(); | |
269 | |
270 XFlush(display_); | |
271 | |
272 // We wait for the property change event with a timeout. If it arrives we know | |
273 // that X is responsive and is not the cause of the watchdog trigger, so we | |
274 // should | |
275 // terminate. If it times out, it may be due to X taking a long time, but | |
276 // terminating won't help, so ignore the watchdog trigger. | |
277 XEvent event_return; | |
278 base::TimeTicks deadline = base::TimeTicks::Now() + timeout_; | |
279 while (true) { | |
280 base::TimeDelta delta = deadline - base::TimeTicks::Now(); | |
281 if (delta < base::TimeDelta()) { | |
282 return; | |
283 } else { | |
284 while (XCheckWindowEvent(display_, window_, PropertyChangeMask, | |
285 &event_return)) { | |
286 if (MatchXEventAtom(&event_return)) | |
287 break; | |
288 } | |
289 struct pollfd fds[1]; | |
290 fds[0].fd = XConnectionNumber(display_); | |
291 fds[0].events = POLLIN; | |
292 int status = poll(fds, 1, delta.InMilliseconds()); | |
293 if (status == -1) { | |
294 if (errno == EINTR) { | |
295 continue; | |
296 } else { | |
297 LOG(FATAL) << "Lost X connection, aborting."; | |
298 break; | |
299 } | |
300 } else if (status == 0) { | |
301 return; | |
302 } else { | |
303 continue; | |
304 } | |
305 } | |
306 } | |
307 #endif | |
308 | |
309 // For minimal developer annoyance, don't keep terminating. You need to skip | |
310 // the call to base::Process::Terminate below in a debugger for this to be | |
311 // useful. | |
312 static bool terminated = false; | |
313 if (terminated) | |
314 return; | |
315 | |
316 #if defined(OS_WIN) | |
317 if (IsDebuggerPresent()) | |
318 return; | |
319 #endif | |
320 | |
321 #if defined(USE_X11) | |
322 // Don't crash if we're not on the TTY of our host X11 server. | |
323 int active_tty = GetActiveTTY(); | |
324 if(host_tty_ != -1 && active_tty != -1 && host_tty_ != active_tty) { | |
325 return; | |
326 } | |
327 #endif | |
328 | |
329 // Store variables so they're available in crash dumps to help determine the | |
330 // cause of any hang. | |
331 #if defined(OS_WIN) | |
332 ULONGLONG fire_interrupt_time; | |
333 QueryUnbiasedInterruptTime(&fire_interrupt_time); | |
334 | |
335 // This is the time since the watchdog was armed, in 100ns intervals, | |
336 // ignoring time where the computer is suspended. | |
337 ULONGLONG interrupt_delay = fire_interrupt_time - arm_interrupt_time_; | |
338 | |
339 base::debug::Alias(&interrupt_delay); | |
340 base::debug::Alias(¤t_cpu_time); | |
341 base::debug::Alias(&time_since_arm); | |
342 | |
343 bool using_thread_ticks = base::ThreadTicks::IsSupported(); | |
344 base::debug::Alias(&using_thread_ticks); | |
345 | |
346 bool using_high_res_timer = base::Time::IsHighResolutionTimerInUse(); | |
347 base::debug::Alias(&using_high_res_timer); | |
348 | |
349 bool message_pump_is_signaled = | |
350 watched_message_loop_->MessagePumpWasSignaled(); | |
351 base::debug::Alias(&message_pump_is_signaled); | |
352 #endif | |
353 | |
354 base::Time current_time = base::Time::Now(); | |
355 base::TimeTicks current_timeticks = base::TimeTicks::Now(); | |
356 base::debug::Alias(¤t_time); | |
357 base::debug::Alias(¤t_timeticks); | |
358 | |
359 LOG(ERROR) << "The GPU process hung. Terminating after " | |
360 << timeout_.InMilliseconds() << " ms."; | |
361 | |
362 // Deliberately crash the process to create a crash dump. | |
363 *((volatile int*)0) = 0x1337; | |
364 | |
365 terminated = true; | |
366 } | |
367 | |
368 #if defined(USE_X11) | |
369 void GpuWatchdogThread::SetupXServer() { | |
370 display_ = XOpenDisplay(NULL); | |
371 window_ = XCreateWindow(display_, DefaultRootWindow(display_), 0, 0, 1, 1, 0, | |
372 CopyFromParent, InputOutput, CopyFromParent, 0, NULL); | |
373 atom_ = XInternAtom(display_, "CHECK", False); | |
374 host_tty_ = GetActiveTTY(); | |
375 } | |
376 | |
377 void GpuWatchdogThread::SetupXChangeProp() { | |
378 XChangeProperty(display_, window_, atom_, XA_STRING, 8, PropModeReplace, text, | |
379 (arraysize(text) - 1)); | |
380 } | |
381 | |
382 bool GpuWatchdogThread::MatchXEventAtom(XEvent* event) { | |
383 if (event->xproperty.window == window_ && event->type == PropertyNotify && | |
384 event->xproperty.atom == atom_) | |
385 return true; | |
386 | |
387 return false; | |
388 } | |
389 | |
390 #endif | |
391 void GpuWatchdogThread::AddPowerObserver() { | |
392 task_runner()->PostTask( | |
393 FROM_HERE, base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this)); | |
394 } | |
395 | |
396 void GpuWatchdogThread::OnAddPowerObserver() { | |
397 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); | |
398 DCHECK(power_monitor); | |
399 power_monitor->AddObserver(this); | |
400 } | |
401 | |
402 void GpuWatchdogThread::OnSuspend() { | |
403 suspended_ = true; | |
404 suspend_time_ = base::Time::Now(); | |
405 | |
406 // When suspending force an acknowledgement to cancel any pending termination | |
407 // tasks. | |
408 OnAcknowledge(); | |
409 } | |
410 | |
411 void GpuWatchdogThread::OnResume() { | |
412 suspended_ = false; | |
413 resume_time_ = base::Time::Now(); | |
414 | |
415 // After resuming jump-start the watchdog again. | |
416 armed_ = false; | |
417 OnCheck(true); | |
418 } | |
419 | |
420 #if defined(OS_WIN) | |
421 base::ThreadTicks GpuWatchdogThread::GetWatchedThreadTime() { | |
422 if (base::ThreadTicks::IsSupported()) { | |
423 // Convert ThreadTicks::Now() to TimeDelta. | |
424 return base::ThreadTicks::GetForThread( | |
425 base::PlatformThreadHandle(watched_thread_handle_)); | |
426 } else { | |
427 // Use GetThreadTimes as a backup mechanism. | |
428 FILETIME creation_time; | |
429 FILETIME exit_time; | |
430 FILETIME user_time; | |
431 FILETIME kernel_time; | |
432 BOOL result = GetThreadTimes(watched_thread_handle_, &creation_time, | |
433 &exit_time, &kernel_time, &user_time); | |
434 DCHECK(result); | |
435 | |
436 ULARGE_INTEGER user_time64; | |
437 user_time64.HighPart = user_time.dwHighDateTime; | |
438 user_time64.LowPart = user_time.dwLowDateTime; | |
439 | |
440 ULARGE_INTEGER kernel_time64; | |
441 kernel_time64.HighPart = kernel_time.dwHighDateTime; | |
442 kernel_time64.LowPart = kernel_time.dwLowDateTime; | |
443 | |
444 // Time is reported in units of 100 nanoseconds. Kernel and user time are | |
445 // summed to deal with to kinds of hangs. One is where the GPU process is | |
446 // stuck in user level, never calling into the kernel and kernel time is | |
447 // not increasing. The other is where either the kernel hangs and never | |
448 // returns to user level or where user level code | |
449 // calls into kernel level repeatedly, giving up its quanta before it is | |
450 // tracked, for example a loop that repeatedly Sleeps. | |
451 return base::ThreadTicks() + | |
452 base::TimeDelta::FromMilliseconds(static_cast<int64_t>( | |
453 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000)); | |
454 } | |
455 } | |
456 #endif | |
457 | |
458 #if defined(USE_X11) | |
459 int GpuWatchdogThread::GetActiveTTY() const { | |
460 char tty_string[8] = {0}; | |
461 if (tty_file_ && !fseek(tty_file_, 0, SEEK_SET) && | |
462 fread(tty_string, 1, 7, tty_file_)) { | |
463 int tty_number; | |
464 size_t num_res = sscanf(tty_string, "tty%d\n", &tty_number); | |
465 if (num_res == 1) | |
466 return tty_number; | |
467 } | |
468 return -1; | |
469 } | |
470 #endif | |
471 | |
472 } // namespace content | |
OLD | NEW |