| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "content/gpu/gpu_watchdog_thread.h" | |
| 6 | |
| 7 #include <errno.h> | |
| 8 #include <stdint.h> | |
| 9 | |
| 10 #include "base/bind.h" | |
| 11 #include "base/bind_helpers.h" | |
| 12 #include "base/command_line.h" | |
| 13 #include "base/compiler_specific.h" | |
| 14 #include "base/debug/alias.h" | |
| 15 #include "base/files/file_util.h" | |
| 16 #include "base/location.h" | |
| 17 #include "base/macros.h" | |
| 18 #include "base/power_monitor/power_monitor.h" | |
| 19 #include "base/process/process.h" | |
| 20 #include "base/single_thread_task_runner.h" | |
| 21 #include "base/threading/platform_thread.h" | |
| 22 #include "build/build_config.h" | |
| 23 #include "content/public/common/content_switches.h" | |
| 24 #include "content/public/common/result_codes.h" | |
| 25 | |
| 26 #if defined(OS_WIN) | |
| 27 #include <windows.h> | |
| 28 #endif | |
| 29 | |
| 30 namespace content { | |
| 31 namespace { | |
| 32 #if defined(USE_X11) | |
| 33 const base::FilePath::CharType | |
| 34 kTtyFilePath[] = FILE_PATH_LITERAL("/sys/class/tty/tty0/active"); | |
| 35 const unsigned char text[20] = "check"; | |
| 36 #endif | |
| 37 } // namespace | |
| 38 | |
| 39 GpuWatchdogThread::GpuWatchdogThread(int timeout) | |
| 40 : base::Thread("Watchdog"), | |
| 41 watched_message_loop_(base::MessageLoop::current()), | |
| 42 timeout_(base::TimeDelta::FromMilliseconds(timeout)), | |
| 43 armed_(false), | |
| 44 task_observer_(this), | |
| 45 use_thread_cpu_time_(true), | |
| 46 responsive_acknowledge_count_(0), | |
| 47 #if defined(OS_WIN) | |
| 48 watched_thread_handle_(0), | |
| 49 arm_cpu_time_(), | |
| 50 #endif | |
| 51 suspended_(false), | |
| 52 #if defined(USE_X11) | |
| 53 display_(NULL), | |
| 54 window_(0), | |
| 55 atom_(None), | |
| 56 host_tty_(-1), | |
| 57 #endif | |
| 58 weak_factory_(this) { | |
| 59 DCHECK(timeout >= 0); | |
| 60 | |
| 61 #if defined(OS_WIN) | |
| 62 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread | |
| 63 // to identify another. DuplicateHandle creates a "real" handle that can be | |
| 64 // used for this purpose. | |
| 65 BOOL result = DuplicateHandle(GetCurrentProcess(), | |
| 66 GetCurrentThread(), | |
| 67 GetCurrentProcess(), | |
| 68 &watched_thread_handle_, | |
| 69 THREAD_QUERY_INFORMATION, | |
| 70 FALSE, | |
| 71 0); | |
| 72 DCHECK(result); | |
| 73 #endif | |
| 74 | |
| 75 #if defined(USE_X11) | |
| 76 tty_file_ = base::OpenFile(base::FilePath(kTtyFilePath), "r"); | |
| 77 SetupXServer(); | |
| 78 #endif | |
| 79 watched_message_loop_->AddTaskObserver(&task_observer_); | |
| 80 } | |
| 81 | |
| 82 void GpuWatchdogThread::PostAcknowledge() { | |
| 83 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use | |
| 84 // the method factory. Rely on reference counting instead. | |
| 85 task_runner()->PostTask(FROM_HERE, | |
| 86 base::Bind(&GpuWatchdogThread::OnAcknowledge, this)); | |
| 87 } | |
| 88 | |
| 89 void GpuWatchdogThread::CheckArmed() { | |
| 90 // Acknowledge the watchdog if it has armed itself. The watchdog will not | |
| 91 // change its armed state until it is acknowledged. | |
| 92 if (armed()) { | |
| 93 PostAcknowledge(); | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 void GpuWatchdogThread::Init() { | |
| 98 // Schedule the first check. | |
| 99 OnCheck(false); | |
| 100 } | |
| 101 | |
| 102 void GpuWatchdogThread::CleanUp() { | |
| 103 weak_factory_.InvalidateWeakPtrs(); | |
| 104 } | |
| 105 | |
| 106 GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver( | |
| 107 GpuWatchdogThread* watchdog) | |
| 108 : watchdog_(watchdog) { | |
| 109 } | |
| 110 | |
| 111 GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() { | |
| 112 } | |
| 113 | |
| 114 void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask( | |
| 115 const base::PendingTask& pending_task) { | |
| 116 watchdog_->CheckArmed(); | |
| 117 } | |
| 118 | |
| 119 void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask( | |
| 120 const base::PendingTask& pending_task) { | |
| 121 } | |
| 122 | |
| 123 GpuWatchdogThread::~GpuWatchdogThread() { | |
| 124 // Verify that the thread was explicitly stopped. If the thread is stopped | |
| 125 // implicitly by the destructor, CleanUp() will not be called. | |
| 126 DCHECK(!weak_factory_.HasWeakPtrs()); | |
| 127 | |
| 128 #if defined(OS_WIN) | |
| 129 CloseHandle(watched_thread_handle_); | |
| 130 #endif | |
| 131 | |
| 132 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); | |
| 133 if (power_monitor) | |
| 134 power_monitor->RemoveObserver(this); | |
| 135 | |
| 136 #if defined(USE_X11) | |
| 137 if (tty_file_) | |
| 138 fclose(tty_file_); | |
| 139 XDestroyWindow(display_, window_); | |
| 140 XCloseDisplay(display_); | |
| 141 #endif | |
| 142 | |
| 143 watched_message_loop_->RemoveTaskObserver(&task_observer_); | |
| 144 } | |
| 145 | |
| 146 void GpuWatchdogThread::OnAcknowledge() { | |
| 147 CHECK(base::PlatformThread::CurrentId() == GetThreadId()); | |
| 148 | |
| 149 // The check has already been acknowledged and another has already been | |
| 150 // scheduled by a previous call to OnAcknowledge. It is normal for a | |
| 151 // watched thread to see armed_ being true multiple times before | |
| 152 // the OnAcknowledge task is run on the watchdog thread. | |
| 153 if (!armed_) | |
| 154 return; | |
| 155 | |
| 156 // Revoke any pending hang termination. | |
| 157 weak_factory_.InvalidateWeakPtrs(); | |
| 158 armed_ = false; | |
| 159 | |
| 160 if (suspended_) { | |
| 161 responsive_acknowledge_count_ = 0; | |
| 162 return; | |
| 163 } | |
| 164 | |
| 165 base::Time current_time = base::Time::Now(); | |
| 166 | |
| 167 // The watchdog waits until at least 6 consecutive checks have returned in | |
| 168 // less than 50 ms before it will start ignoring the CPU time in determining | |
| 169 // whether to timeout. This is a compromise to allow startups that are slow | |
| 170 // due to disk contention to avoid timing out, but once the GPU process is | |
| 171 // running smoothly the watchdog will be able to detect hangs that don't use | |
| 172 // the CPU. | |
| 173 if ((current_time - check_time_) < base::TimeDelta::FromMilliseconds(50)) | |
| 174 responsive_acknowledge_count_++; | |
| 175 else | |
| 176 responsive_acknowledge_count_ = 0; | |
| 177 | |
| 178 if (responsive_acknowledge_count_ >= 6) | |
| 179 use_thread_cpu_time_ = false; | |
| 180 | |
| 181 // If it took a long time for the acknowledgement, assume the computer was | |
| 182 // recently suspended. | |
| 183 bool was_suspended = (current_time > suspension_timeout_); | |
| 184 | |
| 185 // The monitored thread has responded. Post a task to check it again. | |
| 186 task_runner()->PostDelayedTask( | |
| 187 FROM_HERE, base::Bind(&GpuWatchdogThread::OnCheck, | |
| 188 weak_factory_.GetWeakPtr(), was_suspended), | |
| 189 0.5 * timeout_); | |
| 190 } | |
| 191 | |
| 192 void GpuWatchdogThread::OnCheck(bool after_suspend) { | |
| 193 CHECK(base::PlatformThread::CurrentId() == GetThreadId()); | |
| 194 | |
| 195 // Do not create any new termination tasks if one has already been created | |
| 196 // or the system is suspended. | |
| 197 if (armed_ || suspended_) | |
| 198 return; | |
| 199 | |
| 200 // Must set armed before posting the task. This task might be the only task | |
| 201 // that will activate the TaskObserver on the watched thread and it must not | |
| 202 // miss the false -> true transition. | |
| 203 armed_ = true; | |
| 204 | |
| 205 #if defined(OS_WIN) | |
| 206 arm_cpu_time_ = GetWatchedThreadTime(); | |
| 207 | |
| 208 QueryUnbiasedInterruptTime(&arm_interrupt_time_); | |
| 209 #endif | |
| 210 | |
| 211 check_time_ = base::Time::Now(); | |
| 212 check_timeticks_ = base::TimeTicks::Now(); | |
| 213 // Immediately after the computer is woken up from being suspended it might | |
| 214 // be pretty sluggish, so allow some extra time before the next timeout. | |
| 215 base::TimeDelta timeout = timeout_ * (after_suspend ? 3 : 1); | |
| 216 suspension_timeout_ = check_time_ + timeout * 2; | |
| 217 | |
| 218 // Post a task to the monitored thread that does nothing but wake up the | |
| 219 // TaskObserver. Any other tasks that are pending on the watched thread will | |
| 220 // also wake up the observer. This simply ensures there is at least one. | |
| 221 watched_message_loop_->task_runner()->PostTask(FROM_HERE, | |
| 222 base::Bind(&base::DoNothing)); | |
| 223 | |
| 224 // Post a task to the watchdog thread to exit if the monitored thread does | |
| 225 // not respond in time. | |
| 226 task_runner()->PostDelayedTask( | |
| 227 FROM_HERE, | |
| 228 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, | |
| 229 weak_factory_.GetWeakPtr()), | |
| 230 timeout); | |
| 231 } | |
| 232 | |
| 233 // Use the --disable-gpu-watchdog command line switch to disable this. | |
| 234 void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() { | |
| 235 // Should not get here while the system is suspended. | |
| 236 DCHECK(!suspended_); | |
| 237 | |
| 238 #if defined(OS_WIN) | |
| 239 // Defer termination until a certain amount of CPU time has elapsed on the | |
| 240 // watched thread. | |
| 241 base::ThreadTicks current_cpu_time = GetWatchedThreadTime(); | |
| 242 base::TimeDelta time_since_arm = current_cpu_time - arm_cpu_time_; | |
| 243 if (use_thread_cpu_time_ && (time_since_arm < timeout_)) { | |
| 244 task_runner()->PostDelayedTask( | |
| 245 FROM_HERE, | |
| 246 base::Bind(&GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang, | |
| 247 weak_factory_.GetWeakPtr()), | |
| 248 timeout_ - time_since_arm); | |
| 249 return; | |
| 250 } | |
| 251 #endif | |
| 252 | |
| 253 // If the watchdog woke up significantly behind schedule, disarm and reset | |
| 254 // the watchdog check. This is to prevent the watchdog thread from terminating | |
| 255 // when a machine wakes up from sleep or hibernation, which would otherwise | |
| 256 // appear to be a hang. | |
| 257 if (base::Time::Now() > suspension_timeout_) { | |
| 258 armed_ = false; | |
| 259 OnCheck(true); | |
| 260 return; | |
| 261 } | |
| 262 | |
| 263 #if defined(USE_X11) | |
| 264 XWindowAttributes attributes; | |
| 265 XGetWindowAttributes(display_, window_, &attributes); | |
| 266 | |
| 267 XSelectInput(display_, window_, PropertyChangeMask); | |
| 268 SetupXChangeProp(); | |
| 269 | |
| 270 XFlush(display_); | |
| 271 | |
| 272 // We wait for the property change event with a timeout. If it arrives we know | |
| 273 // that X is responsive and is not the cause of the watchdog trigger, so we | |
| 274 // should | |
| 275 // terminate. If it times out, it may be due to X taking a long time, but | |
| 276 // terminating won't help, so ignore the watchdog trigger. | |
| 277 XEvent event_return; | |
| 278 base::TimeTicks deadline = base::TimeTicks::Now() + timeout_; | |
| 279 while (true) { | |
| 280 base::TimeDelta delta = deadline - base::TimeTicks::Now(); | |
| 281 if (delta < base::TimeDelta()) { | |
| 282 return; | |
| 283 } else { | |
| 284 while (XCheckWindowEvent(display_, window_, PropertyChangeMask, | |
| 285 &event_return)) { | |
| 286 if (MatchXEventAtom(&event_return)) | |
| 287 break; | |
| 288 } | |
| 289 struct pollfd fds[1]; | |
| 290 fds[0].fd = XConnectionNumber(display_); | |
| 291 fds[0].events = POLLIN; | |
| 292 int status = poll(fds, 1, delta.InMilliseconds()); | |
| 293 if (status == -1) { | |
| 294 if (errno == EINTR) { | |
| 295 continue; | |
| 296 } else { | |
| 297 LOG(FATAL) << "Lost X connection, aborting."; | |
| 298 break; | |
| 299 } | |
| 300 } else if (status == 0) { | |
| 301 return; | |
| 302 } else { | |
| 303 continue; | |
| 304 } | |
| 305 } | |
| 306 } | |
| 307 #endif | |
| 308 | |
| 309 // For minimal developer annoyance, don't keep terminating. You need to skip | |
| 310 // the call to base::Process::Terminate below in a debugger for this to be | |
| 311 // useful. | |
| 312 static bool terminated = false; | |
| 313 if (terminated) | |
| 314 return; | |
| 315 | |
| 316 #if defined(OS_WIN) | |
| 317 if (IsDebuggerPresent()) | |
| 318 return; | |
| 319 #endif | |
| 320 | |
| 321 #if defined(USE_X11) | |
| 322 // Don't crash if we're not on the TTY of our host X11 server. | |
| 323 int active_tty = GetActiveTTY(); | |
| 324 if(host_tty_ != -1 && active_tty != -1 && host_tty_ != active_tty) { | |
| 325 return; | |
| 326 } | |
| 327 #endif | |
| 328 | |
| 329 // Store variables so they're available in crash dumps to help determine the | |
| 330 // cause of any hang. | |
| 331 #if defined(OS_WIN) | |
| 332 ULONGLONG fire_interrupt_time; | |
| 333 QueryUnbiasedInterruptTime(&fire_interrupt_time); | |
| 334 | |
| 335 // This is the time since the watchdog was armed, in 100ns intervals, | |
| 336 // ignoring time where the computer is suspended. | |
| 337 ULONGLONG interrupt_delay = fire_interrupt_time - arm_interrupt_time_; | |
| 338 | |
| 339 base::debug::Alias(&interrupt_delay); | |
| 340 base::debug::Alias(¤t_cpu_time); | |
| 341 base::debug::Alias(&time_since_arm); | |
| 342 | |
| 343 bool using_thread_ticks = base::ThreadTicks::IsSupported(); | |
| 344 base::debug::Alias(&using_thread_ticks); | |
| 345 | |
| 346 bool using_high_res_timer = base::Time::IsHighResolutionTimerInUse(); | |
| 347 base::debug::Alias(&using_high_res_timer); | |
| 348 | |
| 349 bool message_pump_is_signaled = | |
| 350 watched_message_loop_->MessagePumpWasSignaled(); | |
| 351 base::debug::Alias(&message_pump_is_signaled); | |
| 352 #endif | |
| 353 | |
| 354 base::Time current_time = base::Time::Now(); | |
| 355 base::TimeTicks current_timeticks = base::TimeTicks::Now(); | |
| 356 base::debug::Alias(¤t_time); | |
| 357 base::debug::Alias(¤t_timeticks); | |
| 358 | |
| 359 LOG(ERROR) << "The GPU process hung. Terminating after " | |
| 360 << timeout_.InMilliseconds() << " ms."; | |
| 361 | |
| 362 // Deliberately crash the process to create a crash dump. | |
| 363 *((volatile int*)0) = 0x1337; | |
| 364 | |
| 365 terminated = true; | |
| 366 } | |
| 367 | |
| 368 #if defined(USE_X11) | |
| 369 void GpuWatchdogThread::SetupXServer() { | |
| 370 display_ = XOpenDisplay(NULL); | |
| 371 window_ = XCreateWindow(display_, DefaultRootWindow(display_), 0, 0, 1, 1, 0, | |
| 372 CopyFromParent, InputOutput, CopyFromParent, 0, NULL); | |
| 373 atom_ = XInternAtom(display_, "CHECK", False); | |
| 374 host_tty_ = GetActiveTTY(); | |
| 375 } | |
| 376 | |
| 377 void GpuWatchdogThread::SetupXChangeProp() { | |
| 378 XChangeProperty(display_, window_, atom_, XA_STRING, 8, PropModeReplace, text, | |
| 379 (arraysize(text) - 1)); | |
| 380 } | |
| 381 | |
| 382 bool GpuWatchdogThread::MatchXEventAtom(XEvent* event) { | |
| 383 if (event->xproperty.window == window_ && event->type == PropertyNotify && | |
| 384 event->xproperty.atom == atom_) | |
| 385 return true; | |
| 386 | |
| 387 return false; | |
| 388 } | |
| 389 | |
| 390 #endif | |
| 391 void GpuWatchdogThread::AddPowerObserver() { | |
| 392 task_runner()->PostTask( | |
| 393 FROM_HERE, base::Bind(&GpuWatchdogThread::OnAddPowerObserver, this)); | |
| 394 } | |
| 395 | |
| 396 void GpuWatchdogThread::OnAddPowerObserver() { | |
| 397 base::PowerMonitor* power_monitor = base::PowerMonitor::Get(); | |
| 398 DCHECK(power_monitor); | |
| 399 power_monitor->AddObserver(this); | |
| 400 } | |
| 401 | |
| 402 void GpuWatchdogThread::OnSuspend() { | |
| 403 suspended_ = true; | |
| 404 suspend_time_ = base::Time::Now(); | |
| 405 | |
| 406 // When suspending force an acknowledgement to cancel any pending termination | |
| 407 // tasks. | |
| 408 OnAcknowledge(); | |
| 409 } | |
| 410 | |
| 411 void GpuWatchdogThread::OnResume() { | |
| 412 suspended_ = false; | |
| 413 resume_time_ = base::Time::Now(); | |
| 414 | |
| 415 // After resuming jump-start the watchdog again. | |
| 416 armed_ = false; | |
| 417 OnCheck(true); | |
| 418 } | |
| 419 | |
| 420 #if defined(OS_WIN) | |
| 421 base::ThreadTicks GpuWatchdogThread::GetWatchedThreadTime() { | |
| 422 if (base::ThreadTicks::IsSupported()) { | |
| 423 // Convert ThreadTicks::Now() to TimeDelta. | |
| 424 return base::ThreadTicks::GetForThread( | |
| 425 base::PlatformThreadHandle(watched_thread_handle_)); | |
| 426 } else { | |
| 427 // Use GetThreadTimes as a backup mechanism. | |
| 428 FILETIME creation_time; | |
| 429 FILETIME exit_time; | |
| 430 FILETIME user_time; | |
| 431 FILETIME kernel_time; | |
| 432 BOOL result = GetThreadTimes(watched_thread_handle_, &creation_time, | |
| 433 &exit_time, &kernel_time, &user_time); | |
| 434 DCHECK(result); | |
| 435 | |
| 436 ULARGE_INTEGER user_time64; | |
| 437 user_time64.HighPart = user_time.dwHighDateTime; | |
| 438 user_time64.LowPart = user_time.dwLowDateTime; | |
| 439 | |
| 440 ULARGE_INTEGER kernel_time64; | |
| 441 kernel_time64.HighPart = kernel_time.dwHighDateTime; | |
| 442 kernel_time64.LowPart = kernel_time.dwLowDateTime; | |
| 443 | |
| 444 // Time is reported in units of 100 nanoseconds. Kernel and user time are | |
| 445 // summed to deal with to kinds of hangs. One is where the GPU process is | |
| 446 // stuck in user level, never calling into the kernel and kernel time is | |
| 447 // not increasing. The other is where either the kernel hangs and never | |
| 448 // returns to user level or where user level code | |
| 449 // calls into kernel level repeatedly, giving up its quanta before it is | |
| 450 // tracked, for example a loop that repeatedly Sleeps. | |
| 451 return base::ThreadTicks() + | |
| 452 base::TimeDelta::FromMilliseconds(static_cast<int64_t>( | |
| 453 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000)); | |
| 454 } | |
| 455 } | |
| 456 #endif | |
| 457 | |
| 458 #if defined(USE_X11) | |
| 459 int GpuWatchdogThread::GetActiveTTY() const { | |
| 460 char tty_string[8] = {0}; | |
| 461 if (tty_file_ && !fseek(tty_file_, 0, SEEK_SET) && | |
| 462 fread(tty_string, 1, 7, tty_file_)) { | |
| 463 int tty_number; | |
| 464 size_t num_res = sscanf(tty_string, "tty%d\n", &tty_number); | |
| 465 if (num_res == 1) | |
| 466 return tty_number; | |
| 467 } | |
| 468 return -1; | |
| 469 } | |
| 470 #endif | |
| 471 | |
| 472 } // namespace content | |
| OLD | NEW |