Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(105)

Side by Side Diff: content/gpu/gpu_watchdog_thread.cc

Issue 2286063003: gpu: Move GpuWatchdogThread into //gpu/ipc/service from content. (Closed)
Patch Set: tot merge Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « content/gpu/gpu_watchdog_thread.h ('k') | gpu/gpu_ipc_service.gypi » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/gpu/gpu_watchdog_thread.h"
6
7 #include <errno.h>
8 #include <stdint.h>
9
10 #include "base/bind.h"
11 #include "base/bind_helpers.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/debug/alias.h"
15 #include "base/files/file_util.h"
16 #include "base/location.h"
17 #include "base/macros.h"
18 #include "base/power_monitor/power_monitor.h"
19 #include "base/process/process.h"
20 #include "base/single_thread_task_runner.h"
21 #include "base/threading/platform_thread.h"
22 #include "build/build_config.h"
23 #include "content/public/common/content_switches.h"
24 #include "content/public/common/result_codes.h"
25
26 #if defined(OS_WIN)
27 #include <windows.h>
28 #endif
29
30 namespace content {
31 namespace {
32 #if defined(USE_X11)
33 const base::FilePath::CharType
34 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active");
35 const unsigned char text[20] = "check";
36 #endif
37 } // namespace
38
39 GpuWatchdogThread::GpuWatchdogThread(int timeout)
40 : base::Thread("Watchdog"),
41 watched_message_loop_(base::MessageLoop::current()),
42 timeout_(base::TimeDelta::FromMilliseconds(timeout)),
43 armed_(false),
44 task_observer_(this),
45 use_thread_cpu_time_(true),
46 responsive_acknowledge_count_(0),
47 #if defined(OS_WIN)
48 watched_thread_handle_(0),
49 arm_cpu_time_(),
50 #endif
51 suspended_(false),
52 #if defined(USE_X11)
53 display_(NULL),
54 window_(0),
55 atom_(None),
56 host_tty_(-1),
57 #endif
58 weak_factory_(this) {
59 DCHECK(timeout >= 0);
60
61 #if defined(OS_WIN)
62 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
63 // to identify another. DuplicateHandle creates a "real" handle that can be
64 // used for this purpose.
65 BOOL result = DuplicateHandle(GetCurrentProcess(),
66 GetCurrentThread(),
67 GetCurrentProcess(),
68 &watched_thread_handle_,
69 THREAD_QUERY_INFORMATION,
70 FALSE,
71 0);
72 DCHECK(result);
73 #endif
74
75 #if defined(USE_X11)
76 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r");
77 SetupXServer();
78 #endif
79 watched_message_loop_->AddTaskObserver(&task_observer_);
80 }
81
82 void GpuWatchdogThread::PostAcknowledge() {
83 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
84 // the method factory. Rely on reference counting instead.
85 task_runner()->PostTask(FROM_HERE,
86 base::Bind(&GpuWatchdogThread::OnAcknowledge, this));
87 }
88
89 void GpuWatchdogThread::CheckArmed() {
90 // Acknowledge the watchdog if it has armed itself. The watchdog will not
91 // change its armed state until it is acknowledged.
92 if (armed()) {
93 PostAcknowledge();
94 }
95 }
96
97 void GpuWatchdogThread::Init() {
98 // Schedule the first check.
99 OnCheck(false);
100 }
101
102 void GpuWatchdogThread::CleanUp() {
103 weak_factory_.InvalidateWeakPtrs();
104 }
105
106 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
107 GpuWatchdogThread* watchdog)
108 : watchdog_(watchdog) {
109 }
110
111 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
112 }
113
114 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
115 const base::PendingTask& pending_task) {
116 watchdog_->CheckArmed();
117 }
118
119 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
120 const base::PendingTask& pending_task) {
121 }
122
123 GpuWatchdogThread::~GpuWatchdogThread() {
124 // Verify that the thread was explicitly stopped. If the thread is stopped
125 // implicitly by the destructor, CleanUp() will not be called.
126 DCHECK(!weak_factory_.HasWeakPtrs());
127
128 #if defined(OS_WIN)
129 CloseHandle(watched_thread_handle_);
130 #endif
131
132 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
133 if (power_monitor)
134 power_monitor->RemoveObserver(this);
135
136 #if defined(USE_X11)
137 if (tty_file_)
138 fclose(tty_file_);
139 XDestroyWindow(display_, window_);
140 XCloseDisplay(display_);
141 #endif
142
143 watched_message_loop_->RemoveTaskObserver(&task_observer_);
144 }
145
146 void GpuWatchdogThread::OnAcknowledge() {
147 CHECK(base::PlatformThread::CurrentId() == GetThreadId());
148
149 // The check has already been acknowledged and another has already been
150 // scheduled by a previous call to OnAcknowledge. It is normal for a
151 // watched thread to see armed_ being true multiple times before
152 // the OnAcknowledge task is run on the watchdog thread.
153 if (!armed_)
154 return;
155
156 // Revoke any pending hang termination.
157 weak_factory_.InvalidateWeakPtrs();
158 armed_ = false;
159
160 if (suspended_) {
161 responsive_acknowledge_count_ = 0;
162 return;
163 }
164
165 base::Time current_time = base::Time::Now();
166
167 // The watchdog waits until at least 6 consecutive checks have returned in
168 // less than 50 ms before it will start ignoring the CPU time in determining
169 // whether to timeout. This is a compromise to allow startups that are slow
170 // due to disk contention to avoid timing out, but once the GPU process is
171 // running smoothly the watchdog will be able to detect hangs that don't use
172 // the CPU.
173 if ((current_time - check_time_) < base::TimeDelta::FromMilliseconds(50))
174 responsive_acknowledge_count_++;
175 else
176 responsive_acknowledge_count_ = 0;
177
178 if (responsive_acknowledge_count_ >= 6)
179 use_thread_cpu_time_ = false;
180
181 // If it took a long time for the acknowledgement, assume the computer was
182 // recently suspended.
183 bool was_suspended = (current_time > suspension_timeout_);
184
185 // The monitored thread has responded. Post a task to check it again.
186 task_runner()->PostDelayedTask(
187 FROM_HERE, base::Bind(&GpuWatchdogThread::OnCheck,
188 weak_factory_.GetWeakPtr(), was_suspended),
189 0.5 * timeout_);
190 }
191
192 void GpuWatchdogThread::OnCheck(bool after_suspend) {
193 CHECK(base::PlatformThread::CurrentId() == GetThreadId());
194
195 // Do not create any new termination tasks if one has already been created
196 // or the system is suspended.
197 if (armed_ || suspended_)
198 return;
199
200 // Must set armed before posting the task. This task might be the only task
201 // that will activate the TaskObserver on the watched thread and it must not
202 // miss the false -> true transition.
203 armed_ = true;
204
205 #if defined(OS_WIN)
206 arm_cpu_time_ = GetWatchedThreadTime();
207
208 QueryUnbiasedInterruptTime(&arm_interrupt_time_);
209 #endif
210
211 check_time_ = base::Time::Now();
212 check_timeticks_ = base::TimeTicks::Now();
213 // Immediately after the computer is woken up from being suspended it might
214 // be pretty sluggish, so allow some extra time before the next timeout.
215 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1);
216 suspension_timeout_ = check_time_ + timeout * 2;
217
218 // Post a task to the monitored thread that does nothing but wake up the
219 // TaskObserver. Any other tasks that are pending on the watched thread will
220 // also wake up the observer. This simply ensures there is at least one.
221 watched_message_loop_->task_runner()->PostTask(FROM_HERE,
222 base::Bind(&base::DoNothing));
223
224 // Post a task to the watchdog thread to exit if the monitored thread does
225 // not respond in time.
226 task_runner()->PostDelayedTask(
227 FROM_HERE,
228 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
229 weak_factory_.GetWeakPtr()),
230 timeout);
231 }
232
233 // Use the --disable-gpu-watchdog command line switch to disable this.
234 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
235 // Should not get here while the system is suspended.
236 DCHECK(!suspended_);
237
238 #if defined(OS_WIN)
239 // Defer termination until a certain amount of CPU time has elapsed on the
240 // watched thread.
241 base::ThreadTicks current_cpu_time = GetWatchedThreadTime();
242 base::TimeDelta time_since_arm = current_cpu_time - arm_cpu_time_;
243 if (use_thread_cpu_time_ && (time_since_arm < timeout_)) {
244 task_runner()->PostDelayedTask(
245 FROM_HERE,
246 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang,
247 weak_factory_.GetWeakPtr()),
248 timeout_ - time_since_arm);
249 return;
250 }
251 #endif
252
253 // If the watchdog woke up significantly behind schedule, disarm and reset
254 // the watchdog check. This is to prevent the watchdog thread from terminating
255 // when a machine wakes up from sleep or hibernation, which would otherwise
256 // appear to be a hang.
257 if (base::Time::Now() > suspension_timeout_) {
258 armed_ = false;
259 OnCheck(true);
260 return;
261 }
262
263 #if defined(USE_X11)
264 XWindowAttributes attributes;
265 XGetWindowAttributes(display_, window_, &attributes);
266
267 XSelectInput(display_, window_, PropertyChangeMask);
268 SetupXChangeProp();
269
270 XFlush(display_);
271
272 // We wait for the property change event with a timeout. If it arrives we know
273 // that X is responsive and is not the cause of the watchdog trigger, so we
274 // should
275 // terminate. If it times out, it may be due to X taking a long time, but
276 // terminating won't help, so ignore the watchdog trigger.
277 XEvent event_return;
278 base::TimeTicks deadline = base::TimeTicks::Now() + timeout_;
279 while (true) {
280 base::TimeDelta delta = deadline - base::TimeTicks::Now();
281 if (delta < base::TimeDelta()) {
282 return;
283 } else {
284 while (XCheckWindowEvent(display_, window_, PropertyChangeMask,
285 &event_return)) {
286 if (MatchXEventAtom(&event_return))
287 break;
288 }
289 struct pollfd fds[1];
290 fds[0].fd = XConnectionNumber(display_);
291 fds[0].events = POLLIN;
292 int status = poll(fds, 1, delta.InMilliseconds());
293 if (status == -1) {
294 if (errno == EINTR) {
295 continue;
296 } else {
297 LOG(FATAL) << "Lost X connection, aborting.";
298 break;
299 }
300 } else if (status == 0) {
301 return;
302 } else {
303 continue;
304 }
305 }
306 }
307 #endif
308
309 // For minimal developer annoyance, don't keep terminating. You need to skip
310 // the call to base::Process::Terminate below in a debugger for this to be
311 // useful.
312 static bool terminated = false;
313 if (terminated)
314 return;
315
316 #if defined(OS_WIN)
317 if (IsDebuggerPresent())
318 return;
319 #endif
320
321 #if defined(USE_X11)
322 // Don't crash if we're not on the TTY of our host X11 server.
323 int active_tty = GetActiveTTY();
324 if(host_tty_ != -1 && active_tty != -1 && host_tty_ != active_tty) {
325 return;
326 }
327 #endif
328
329 // Store variables so they're available in crash dumps to help determine the
330 // cause of any hang.
331 #if defined(OS_WIN)
332 ULONGLONG fire_interrupt_time;
333 QueryUnbiasedInterruptTime(&fire_interrupt_time);
334
335 // This is the time since the watchdog was armed, in 100ns intervals,
336 // ignoring time where the computer is suspended.
337 ULONGLONG interrupt_delay = fire_interrupt_time - arm_interrupt_time_;
338
339 base::debug::Alias(&interrupt_delay);
340 base::debug::Alias(&current_cpu_time);
341 base::debug::Alias(&time_since_arm);
342
343 bool using_thread_ticks = base::ThreadTicks::IsSupported();
344 base::debug::Alias(&using_thread_ticks);
345
346 bool using_high_res_timer = base::Time::IsHighResolutionTimerInUse();
347 base::debug::Alias(&using_high_res_timer);
348
349 bool message_pump_is_signaled =
350 watched_message_loop_->MessagePumpWasSignaled();
351 base::debug::Alias(&message_pump_is_signaled);
352 #endif
353
354 base::Time current_time = base::Time::Now();
355 base::TimeTicks current_timeticks = base::TimeTicks::Now();
356 base::debug::Alias(&current_time);
357 base::debug::Alias(&current_timeticks);
358
359 LOG(ERROR) << "The GPU process hung. Terminating after "
360 << timeout_.InMilliseconds() << " ms.";
361
362 // Deliberately crash the process to create a crash dump.
363 *((volatile int*)0) = 0x1337;
364
365 terminated = true;
366 }
367
368 #if defined(USE_X11)
369 void GpuWatchdogThread::SetupXServer() {
370 display_ = XOpenDisplay(NULL);
371 window_ = XCreateWindow(display_, DefaultRootWindow(display_), 0, 0, 1, 1, 0,
372 CopyFromParent, InputOutput, CopyFromParent, 0, NULL);
373 atom_ = XInternAtom(display_, "CHECK", False);
374 host_tty_ = GetActiveTTY();
375 }
376
377 void GpuWatchdogThread::SetupXChangeProp() {
378 XChangeProperty(display_, window_, atom_, XA_STRING, 8, PropModeReplace, text,
379 (arraysize(text) - 1));
380 }
381
382 bool GpuWatchdogThread::MatchXEventAtom(XEvent* event) {
383 if (event->xproperty.window == window_ && event->type == PropertyNotify &&
384 event->xproperty.atom == atom_)
385 return true;
386
387 return false;
388 }
389
390 #endif
391 void GpuWatchdogThread::AddPowerObserver() {
392 task_runner()->PostTask(
393 FROM_HERE, base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this));
394 }
395
396 void GpuWatchdogThread::OnAddPowerObserver() {
397 base::PowerMonitor* power_monitor = base::PowerMonitor::Get();
398 DCHECK(power_monitor);
399 power_monitor->AddObserver(this);
400 }
401
402 void GpuWatchdogThread::OnSuspend() {
403 suspended_ = true;
404 suspend_time_ = base::Time::Now();
405
406 // When suspending force an acknowledgement to cancel any pending termination
407 // tasks.
408 OnAcknowledge();
409 }
410
411 void GpuWatchdogThread::OnResume() {
412 suspended_ = false;
413 resume_time_ = base::Time::Now();
414
415 // After resuming jump-start the watchdog again.
416 armed_ = false;
417 OnCheck(true);
418 }
419
420 #if defined(OS_WIN)
421 base::ThreadTicks GpuWatchdogThread::GetWatchedThreadTime() {
422 if (base::ThreadTicks::IsSupported()) {
423 // Convert ThreadTicks::Now() to TimeDelta.
424 return base::ThreadTicks::GetForThread(
425 base::PlatformThreadHandle(watched_thread_handle_));
426 } else {
427 // Use GetThreadTimes as a backup mechanism.
428 FILETIME creation_time;
429 FILETIME exit_time;
430 FILETIME user_time;
431 FILETIME kernel_time;
432 BOOL result = GetThreadTimes(watched_thread_handle_, &creation_time,
433 &exit_time, &kernel_time, &user_time);
434 DCHECK(result);
435
436 ULARGE_INTEGER user_time64;
437 user_time64.HighPart = user_time.dwHighDateTime;
438 user_time64.LowPart = user_time.dwLowDateTime;
439
440 ULARGE_INTEGER kernel_time64;
441 kernel_time64.HighPart = kernel_time.dwHighDateTime;
442 kernel_time64.LowPart = kernel_time.dwLowDateTime;
443
444 // Time is reported in units of 100 nanoseconds. Kernel and user time are
445 // summed to deal with to kinds of hangs. One is where the GPU process is
446 // stuck in user level, never calling into the kernel and kernel time is
447 // not increasing. The other is where either the kernel hangs and never
448 // returns to user level or where user level code
449 // calls into kernel level repeatedly, giving up its quanta before it is
450 // tracked, for example a loop that repeatedly Sleeps.
451 return base::ThreadTicks() +
452 base::TimeDelta::FromMilliseconds(static_cast<int64_t>(
453 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000));
454 }
455 }
456 #endif
457
458 #if defined(USE_X11)
459 int GpuWatchdogThread::GetActiveTTY() const {
460 char tty_string[8] = {0};
461 if (tty_file_ && !fseek(tty_file_, 0, SEEK_SET) &&
462 fread(tty_string, 1, 7, tty_file_)) {
463 int tty_number;
464 size_t num_res = sscanf(tty_string, "tty%d\n", &tty_number);
465 if (num_res == 1)
466 return tty_number;
467 }
468 return -1;
469 }
470 #endif
471
472 } // namespace content
OLDNEW
« no previous file with comments | « content/gpu/gpu_watchdog_thread.h ('k') | gpu/gpu_ipc_service.gypi » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698