Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/string_tokenizer.h" | |
| 5 #include "base/threading/thread_restrictions.h" | 6 #include "base/threading/thread_restrictions.h" |
| 6 #include "build/build_config.h" | 7 #include "build/build_config.h" |
| 7 #include "chrome/browser/metrics/metrics_service.h" | 8 #include "chrome/browser/metrics/metrics_service.h" |
| 8 #include "chrome/browser/metrics/thread_watcher.h" | 9 #include "chrome/browser/metrics/thread_watcher.h" |
| 10 #include "chrome/common/chrome_switches.h" | |
| 9 #include "content/common/notification_service.h" | 11 #include "content/common/notification_service.h" |
| 10 | 12 |
| 11 #if defined(OS_WIN) | 13 #if defined(OS_WIN) |
| 12 #include <Objbase.h> | 14 #include <Objbase.h> |
| 13 #endif | 15 #endif |
| 14 | 16 |
| 15 // static | 17 // static |
| 16 const int ThreadWatcher::kPingCount = 6; | 18 const int ThreadWatcher::kPingCount = 6; |
| 17 | 19 |
| 18 // static | |
| 19 const int ThreadWatcher::kUnresponsiveCount = 6; | |
| 20 | |
| 21 // ThreadWatcher methods and members. | 20 // ThreadWatcher methods and members. |
| 22 ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, | 21 ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, |
| 23 const std::string& thread_name, | 22 const std::string& thread_name, |
| 24 const base::TimeDelta& sleep_time, | 23 const base::TimeDelta& sleep_time, |
| 25 const base::TimeDelta& unresponsive_time) | 24 const base::TimeDelta& unresponsive_time) |
| 26 : thread_id_(thread_id), | 25 : thread_id_(thread_id), |
| 27 thread_name_(thread_name), | 26 thread_name_(thread_name), |
| 28 sleep_time_(sleep_time), | 27 sleep_time_(sleep_time), |
| 29 unresponsive_time_(unresponsive_time), | 28 unresponsive_time_(unresponsive_time), |
| 30 ping_time_(base::TimeTicks::Now()), | 29 ping_time_(base::TimeTicks::Now()), |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 237 | 236 |
| 238 void ThreadWatcher::ResetHangCounters() { | 237 void ThreadWatcher::ResetHangCounters() { |
| 239 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 238 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| 240 unresponsive_count_ = 0; | 239 unresponsive_count_ = 0; |
| 241 hung_processing_complete_ = false; | 240 hung_processing_complete_ = false; |
| 242 } | 241 } |
| 243 | 242 |
| 244 void ThreadWatcher::GotNoResponse() { | 243 void ThreadWatcher::GotNoResponse() { |
| 245 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 244 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| 246 | 245 |
| 247 // Record how other threads are responding when we don't get a response for | 246 ++unresponsive_count_; |
| 248 // ping message atleast kUnresponsiveCount times. | 247 |
| 249 if (++unresponsive_count_ < kUnresponsiveCount) | 248 // Check if the watched thread's unresponsiveness has gone over the limit. |
| 249 if (ThreadWatcherList::IsResponsive(this)) | |
|
jar (doing other things)
2011/06/10 00:38:33
This interface surprises me. We are asking a ques
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 250 return; | 250 return; |
| 251 | 251 |
| 252 // Record total unresponsive_time since last pong message. | 252 // Record total unresponsive_time since last pong message. |
| 253 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; | 253 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; |
| 254 unresponsive_time_histogram_->AddTime(unresponse_time); | 254 unresponsive_time_histogram_->AddTime(unresponse_time); |
| 255 | 255 |
| 256 // We have already collected stats for the non-responding watched thread. | 256 // We have already collected stats for the non-responding watched thread. |
| 257 if (hung_processing_complete_) | 257 if (hung_processing_complete_) |
| 258 return; | 258 return; |
| 259 | 259 |
| 260 int no_of_responding_threads = 0; | 260 int no_of_responding_threads = 0; |
| 261 int no_of_unresponding_threads = 0; | 261 int no_of_unresponding_threads = 0; |
| 262 ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, | 262 ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, |
| 263 &no_of_unresponding_threads); | 263 &no_of_unresponding_threads); |
| 264 | 264 |
| 265 // Record how many watched threads are responding. | 265 // Record how many watched threads are responding. |
| 266 responsive_count_histogram_->Add(no_of_responding_threads); | 266 responsive_count_histogram_->Add(no_of_responding_threads); |
| 267 | 267 |
| 268 // Record how many watched threads are not responding. | 268 // Record how many watched threads are not responding. |
| 269 unresponsive_count_histogram_->Add(no_of_unresponding_threads); | 269 unresponsive_count_histogram_->Add(no_of_unresponding_threads); |
| 270 | 270 |
| 271 // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount | 271 // Crash the browser if watched thread is in "--crash-on-hang-threads" command |
|
jar (doing other things)
2011/06/10 00:38:33
This sentence seems to focus on the command line s
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 272 // times and if the number of other threads is equal to 1. We picked 1 to | 272 // line switch. We crash if the number of threads responding is equal to 1. We |
| 273 // reduce the number of crashes and to get some sample data. | 273 // picked 1 to reduce the number of crashes and to get some sample data. |
| 274 if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { | 274 if (no_of_responding_threads == 1 && ThreadWatcherList::CrashOnHang(this)) { |
|
jar (doing other things)
2011/06/10 00:38:33
I'm not sure which is better.... but I suspect we
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 275 int* crash = NULL; | 275 int* crash = NULL; |
| 276 CHECK(crash++); | 276 CHECK(crash+thread_id_); |
| 277 } | 277 } |
| 278 | 278 |
| 279 hung_processing_complete_ = true; | 279 hung_processing_complete_ = true; |
| 280 } | 280 } |
| 281 | 281 |
| 282 // ThreadWatcherList methods and members. | 282 // ThreadWatcherList methods and members. |
| 283 // | 283 // |
| 284 // static | 284 // static |
| 285 ThreadWatcherList* ThreadWatcherList::global_ = NULL; | 285 ThreadWatcherList* ThreadWatcherList::global_ = NULL; |
| 286 // static | 286 // static |
| 287 const int ThreadWatcherList::kSleepSeconds = 1; | 287 const int ThreadWatcherList::kSleepSeconds = 1; |
| 288 // static | 288 // static |
| 289 const int ThreadWatcherList::kUnresponsiveSeconds = 2; | 289 const int ThreadWatcherList::kUnresponsiveSeconds = 2; |
| 290 // static | |
| 291 const int ThreadWatcherList::kUnresponsiveCount = 6; | |
| 290 | 292 |
| 291 ThreadWatcherList::ThreadWatcherList() | 293 ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line) |
| 292 : last_wakeup_time_(base::TimeTicks::Now()) { | 294 : last_wakeup_time_(base::TimeTicks::Now()) { |
| 293 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we | 295 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
| 294 // are on UI thread, but Unit tests are not running on UI thread. | 296 // are on UI thread, but Unit tests are not running on UI thread. |
| 295 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); | 297 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); |
| 296 CHECK(!global_); | 298 CHECK(!global_); |
| 297 global_ = this; | 299 global_ = this; |
| 298 // Register Notifications observer. | 300 // Register Notifications observer. |
| 299 MetricsService::SetUpNotifications(®istrar_, this); | 301 MetricsService::SetUpNotifications(®istrar_, this); |
| 302 | |
| 303 crash_on_unresponsive_count_ = kUnresponsiveCount; | |
| 304 std::string crash_on_hang_seconds = | |
| 305 command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); | |
| 306 if (!crash_on_hang_seconds.empty()) { | |
| 307 int crash_seconds = atoi(crash_on_hang_seconds.c_str()); | |
| 308 if (crash_seconds > 0) | |
| 309 crash_on_unresponsive_count_ = crash_seconds / kUnresponsiveSeconds; | |
|
jar (doing other things)
2011/06/10 00:38:33
I don't think you meant to scale it down. If you
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 310 } | |
| 311 | |
| 312 std::string crash_on_hang_threads = | |
| 313 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); | |
| 314 if (crash_on_hang_threads.empty()) { | |
| 315 // Crash the browser if UI or IO threads are not responsive. | |
| 316 crash_on_hang_threads = "UI,IO"; | |
|
jar (doing other things)
2011/06/10 00:38:33
You probably could use an early return here. Perh
ramant (doing other things)
2011/06/13 03:26:02
Wanted to add UI and IO threads (which we wanted t
| |
| 317 } | |
| 318 StringTokenizer t(crash_on_hang_threads, ","); | |
| 319 while (t.GetNext()) { | |
| 320 std::string thread_name = t.token(); | |
| 321 // We will ignore empty and duplicate thread_names. | |
|
jar (doing other things)
2011/06/10 00:38:33
You probably don't need to worry about dups (which
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 322 if (!thread_name.empty()) | |
| 323 crash_on_hang_thread_names_.insert(thread_name); | |
| 324 } | |
| 300 } | 325 } |
| 301 | 326 |
| 302 ThreadWatcherList::~ThreadWatcherList() { | 327 ThreadWatcherList::~ThreadWatcherList() { |
| 303 base::AutoLock auto_lock(lock_); | 328 base::AutoLock auto_lock(lock_); |
| 304 DCHECK(this == global_); | 329 DCHECK(this == global_); |
| 330 global_->crash_on_hang_thread_names_.clear(); | |
|
jar (doing other things)
2011/06/10 00:38:33
I don't think you need to waste time clear()ing.
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 305 global_ = NULL; | 331 global_ = NULL; |
| 306 } | 332 } |
| 307 | 333 |
| 308 // static | 334 // static |
| 309 void ThreadWatcherList::Register(ThreadWatcher* watcher) { | 335 void ThreadWatcherList::Register(ThreadWatcher* watcher) { |
| 310 if (!global_) | 336 if (!global_) |
| 311 return; | 337 return; |
| 312 base::AutoLock auto_lock(global_->lock_); | 338 base::AutoLock auto_lock(global_->lock_); |
| 313 DCHECK(!global_->PreLockedFind(watcher->thread_id())); | 339 DCHECK(!global_->PreLockedFind(watcher->thread_id())); |
| 314 global_->registered_[watcher->thread_id()] = watcher; | 340 global_->registered_[watcher->thread_id()] = watcher; |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 377 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we | 403 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
| 378 // are on UI thread, but Unit tests are not running on UI thread. | 404 // are on UI thread, but Unit tests are not running on UI thread. |
| 379 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); | 405 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); |
| 380 if (!global_) | 406 if (!global_) |
| 381 return; | 407 return; |
| 382 base::AutoLock auto_lock(global_->lock_); | 408 base::AutoLock auto_lock(global_->lock_); |
| 383 global_->registrar_.RemoveAll(); | 409 global_->registrar_.RemoveAll(); |
| 384 } | 410 } |
| 385 | 411 |
| 386 // static | 412 // static |
| 413 bool ThreadWatcherList::IsResponsive(ThreadWatcher* watcher) { | |
|
jar (doing other things)
2011/06/10 00:38:33
I'd rather see just the unresponsive_count() passe
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
| |
| 414 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | |
| 415 if (!global_) | |
| 416 return true; | |
| 417 if (watcher->unresponsive_count() < global_->crash_on_unresponsive_count_) | |
| 418 return true; | |
| 419 return false; | |
|
jar (doing other things)
2011/06/10 00:38:33
Except for unusual circumstances (where we plan to
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
| |
| 420 } | |
| 421 | |
| 422 // static | |
| 423 bool ThreadWatcherList::CrashOnHang(ThreadWatcher* watcher) { | |
|
jar (doing other things)
2011/06/10 00:38:33
I'd rather see this take a std::string thread_name
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 424 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | |
| 425 if (!global_) | |
| 426 return false; | |
| 427 if (IsResponsive(watcher)) | |
| 428 return false; | |
| 429 std::set<std::string>::iterator it = | |
| 430 global_->crash_on_hang_thread_names_.find(watcher->thread_name()); | |
| 431 if (it != global_->crash_on_hang_thread_names_.end()) | |
| 432 return true; | |
| 433 return false; | |
|
jar (doing other things)
2011/06/10 00:38:33
return it != global_->crash_on_hang_thread_names_.
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 434 } | |
| 435 | |
| 436 // static | |
| 387 void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads, | 437 void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads, |
| 388 int* no_of_unresponding_threads) { | 438 int* no_of_unresponding_threads) { |
| 389 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 439 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| 390 *no_of_responding_threads = 0; | 440 *no_of_responding_threads = 0; |
| 391 *no_of_unresponding_threads = 0; | 441 *no_of_unresponding_threads = 0; |
| 392 if (!global_) | 442 if (!global_) |
| 393 return; | 443 return; |
| 394 | 444 |
| 395 base::AutoLock auto_lock(global_->lock_); | 445 base::AutoLock auto_lock(global_->lock_); |
| 396 for (RegistrationList::iterator it = global_->registered_.begin(); | 446 for (RegistrationList::iterator it = global_->registered_.begin(); |
| 397 global_->registered_.end() != it; | 447 global_->registered_.end() != it; |
| 398 ++it) { | 448 ++it) { |
| 399 if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) | 449 if (it->second->unresponsive_count() < |
| 450 global_->crash_on_unresponsive_count_) | |
|
jar (doing other things)
2011/06/10 00:38:33
FWIW: IF you restructured this (pushing the thresh
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 400 ++(*no_of_responding_threads); | 451 ++(*no_of_responding_threads); |
| 401 else | 452 else |
| 402 ++(*no_of_unresponding_threads); | 453 ++(*no_of_unresponding_threads); |
| 403 } | 454 } |
| 404 } | 455 } |
| 405 | 456 |
| 406 void ThreadWatcherList::DeleteAll() { | 457 void ThreadWatcherList::DeleteAll() { |
| 407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 458 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| 408 base::AutoLock auto_lock(lock_); | 459 base::AutoLock auto_lock(lock_); |
| 409 while (!registered_.empty()) { | 460 while (!registered_.empty()) { |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 541 watchdog_thread_ = NULL; | 592 watchdog_thread_ = NULL; |
| 542 } | 593 } |
| 543 | 594 |
| 544 void WatchDogThread::CleanUpAfterMessageLoopDestruction() { | 595 void WatchDogThread::CleanUpAfterMessageLoopDestruction() { |
| 545 #if defined(OS_WIN) | 596 #if defined(OS_WIN) |
| 546 // Closes the COM library on the current thread. CoInitialize must | 597 // Closes the COM library on the current thread. CoInitialize must |
| 547 // be balanced by a corresponding call to CoUninitialize. | 598 // be balanced by a corresponding call to CoUninitialize. |
| 548 CoUninitialize(); | 599 CoUninitialize(); |
| 549 #endif | 600 #endif |
| 550 } | 601 } |
| OLD | NEW |