Chromium Code Reviews| Index: chrome/browser/metrics/thread_watcher.cc |
| =================================================================== |
| --- chrome/browser/metrics/thread_watcher.cc (revision 88502) |
| +++ chrome/browser/metrics/thread_watcher.cc (working copy) |
| @@ -2,10 +2,12 @@ |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| +#include "base/string_tokenizer.h" |
| #include "base/threading/thread_restrictions.h" |
| #include "build/build_config.h" |
| #include "chrome/browser/metrics/metrics_service.h" |
| #include "chrome/browser/metrics/thread_watcher.h" |
| +#include "chrome/common/chrome_switches.h" |
| #include "content/common/notification_service.h" |
| #if defined(OS_WIN) |
| @@ -15,9 +17,6 @@ |
| // static |
| const int ThreadWatcher::kPingCount = 6; |
| -// static |
| -const int ThreadWatcher::kUnresponsiveCount = 6; |
| - |
| // ThreadWatcher methods and members. |
| ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, |
| const std::string& thread_name, |
| @@ -244,9 +243,10 @@ |
| void ThreadWatcher::GotNoResponse() { |
| DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| - // Record how other threads are responding when we don't get a response for |
| - // ping message atleast kUnresponsiveCount times. |
| - if (++unresponsive_count_ < kUnresponsiveCount) |
| + ++unresponsive_count_; |
| + |
| + // Check if the watched thread's unresponsiveness has gone over the limit. |
| + if (ThreadWatcherList::IsResponsive(this)) |
|
jar (doing other things)
2011/06/10 00:38:33
This interface surprises me. We are asking a ques
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| return; |
| // Record total unresponsive_time since last pong message. |
| @@ -268,12 +268,12 @@ |
| // Record how many watched threads are not responding. |
| unresponsive_count_histogram_->Add(no_of_unresponding_threads); |
| - // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount |
| - // times and if the number of other threads is equal to 1. We picked 1 to |
| - // reduce the number of crashes and to get some sample data. |
| - if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { |
| + // Crash the browser if watched thread is in "--crash-on-hang-threads" command |
|
jar (doing other things)
2011/06/10 00:38:33
This sentence seems to focus on the command line s
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| + // line switch. We crash if the number of threads responding is equal to 1. We |
| + // picked 1 to reduce the number of crashes and to get some sample data. |
| + if (no_of_responding_threads == 1 && ThreadWatcherList::CrashOnHang(this)) { |
|
jar (doing other things)
2011/06/10 00:38:33
I'm not sure which is better.... but I suspect we
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| int* crash = NULL; |
| - CHECK(crash++); |
| + CHECK(crash+thread_id_); |
| } |
| hung_processing_complete_ = true; |
| @@ -287,8 +287,10 @@ |
| const int ThreadWatcherList::kSleepSeconds = 1; |
| // static |
| const int ThreadWatcherList::kUnresponsiveSeconds = 2; |
| +// static |
| +const int ThreadWatcherList::kUnresponsiveCount = 6; |
| -ThreadWatcherList::ThreadWatcherList() |
| +ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line) |
| : last_wakeup_time_(base::TimeTicks::Now()) { |
| // Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
| // are on UI thread, but Unit tests are not running on UI thread. |
| @@ -297,11 +299,35 @@ |
| global_ = this; |
| // Register Notifications observer. |
| MetricsService::SetUpNotifications(®istrar_, this); |
| + |
| + crash_on_unresponsive_count_ = kUnresponsiveCount; |
| + std::string crash_on_hang_seconds = |
| + command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); |
| + if (!crash_on_hang_seconds.empty()) { |
| + int crash_seconds = atoi(crash_on_hang_seconds.c_str()); |
| + if (crash_seconds > 0) |
| + crash_on_unresponsive_count_ = crash_seconds / kUnresponsiveSeconds; |
|
jar (doing other things)
2011/06/10 00:38:33
I don't think you meant to scale it down. If you
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| + } |
| + |
| + std::string crash_on_hang_threads = |
| + command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); |
| + if (crash_on_hang_threads.empty()) { |
| + // Crash the browser if UI or IO threads are not responsive. |
| + crash_on_hang_threads = "UI,IO"; |
|
jar (doing other things)
2011/06/10 00:38:33
You probably could use an early return here. Perh
ramant (doing other things)
2011/06/13 03:26:02
Wanted to add UI and IO threads (which we wanted t
|
| + } |
| + StringTokenizer t(crash_on_hang_threads, ","); |
| + while (t.GetNext()) { |
| + std::string thread_name = t.token(); |
| + // We will ignore empty and duplicate thread_names. |
|
jar (doing other things)
2011/06/10 00:38:33
You probably don't need to worry about dups (which
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| + if (!thread_name.empty()) |
| + crash_on_hang_thread_names_.insert(thread_name); |
| + } |
| } |
| ThreadWatcherList::~ThreadWatcherList() { |
| base::AutoLock auto_lock(lock_); |
| DCHECK(this == global_); |
| + global_->crash_on_hang_thread_names_.clear(); |
|
jar (doing other things)
2011/06/10 00:38:33
I don't think you need to waste time clear()ing.
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| global_ = NULL; |
| } |
| @@ -384,6 +410,30 @@ |
| } |
| // static |
| +bool ThreadWatcherList::IsResponsive(ThreadWatcher* watcher) { |
|
jar (doing other things)
2011/06/10 00:38:33
I'd rather see just the unresponsive_count() passe
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
|
| + DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| + if (!global_) |
| + return true; |
| + if (watcher->unresponsive_count() < global_->crash_on_unresponsive_count_) |
| + return true; |
| + return false; |
|
jar (doing other things)
2011/06/10 00:38:33
Except for unusual circumstances (where we plan to
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
|
| +} |
| + |
| +// static |
| +bool ThreadWatcherList::CrashOnHang(ThreadWatcher* watcher) { |
|
jar (doing other things)
2011/06/10 00:38:33
I'd rather see this take a std::string thread_name
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| + DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| + if (!global_) |
| + return false; |
| + if (IsResponsive(watcher)) |
| + return false; |
| + std::set<std::string>::iterator it = |
| + global_->crash_on_hang_thread_names_.find(watcher->thread_name()); |
| + if (it != global_->crash_on_hang_thread_names_.end()) |
| + return true; |
| + return false; |
|
jar (doing other things)
2011/06/10 00:38:33
return it != global_->crash_on_hang_thread_names_.
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| +} |
| + |
| +// static |
| void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads, |
| int* no_of_unresponding_threads) { |
| DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| @@ -396,7 +446,8 @@ |
| for (RegistrationList::iterator it = global_->registered_.begin(); |
| global_->registered_.end() != it; |
| ++it) { |
| - if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) |
| + if (it->second->unresponsive_count() < |
| + global_->crash_on_unresponsive_count_) |
|
jar (doing other things)
2011/06/10 00:38:33
FWIW: IF you restructured this (pushing the thresh
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
| ++(*no_of_responding_threads); |
| else |
| ++(*no_of_unresponding_threads); |