Chromium Code Reviews| Index: chrome/browser/metrics/thread_watcher.cc |
| =================================================================== |
| --- chrome/browser/metrics/thread_watcher.cc (revision 88502) |
| +++ chrome/browser/metrics/thread_watcher.cc (working copy) |
| @@ -2,10 +2,14 @@ |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| +#include <math.h> // ceil |
| + |
| +#include "base/string_tokenizer.h" |
| #include "base/threading/thread_restrictions.h" |
| #include "build/build_config.h" |
| #include "chrome/browser/metrics/metrics_service.h" |
| #include "chrome/browser/metrics/thread_watcher.h" |
| +#include "chrome/common/chrome_switches.h" |
| #include "content/common/notification_service.h" |
| #if defined(OS_WIN) |
| @@ -15,14 +19,13 @@ |
| // static |
| const int ThreadWatcher::kPingCount = 6; |
| -// static |
| -const int ThreadWatcher::kUnresponsiveCount = 6; |
| - |
| // ThreadWatcher methods and members. |
| ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, |
| const std::string& thread_name, |
| const base::TimeDelta& sleep_time, |
| - const base::TimeDelta& unresponsive_time) |
| + const base::TimeDelta& unresponsive_time, |
| + uint32 crash_on_unresponsive_count, |
| + bool crash_on_hang) |
| : thread_id_(thread_id), |
| thread_name_(thread_name), |
| sleep_time_(sleep_time), |
| @@ -36,6 +39,8 @@ |
| unresponsive_time_histogram_(NULL), |
| unresponsive_count_(0), |
| hung_processing_complete_(false), |
| + crash_on_unresponsive_count_(crash_on_unresponsive_count), |
| + crash_on_hang_(crash_on_hang), |
| ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { |
| Initialize(); |
| } |
| @@ -46,7 +51,9 @@ |
| void ThreadWatcher::StartWatching(const BrowserThread::ID& thread_id, |
| const std::string& thread_name, |
| const base::TimeDelta& sleep_time, |
| - const base::TimeDelta& unresponsive_time) { |
| + const base::TimeDelta& unresponsive_time, |
| + uint32 crash_on_unresponsive_count, |
| + bool crash_on_hang) { |
| DCHECK_GE(sleep_time.InMilliseconds(), 0); |
| DCHECK_GE(unresponsive_time.InMilliseconds(), sleep_time.InMilliseconds()); |
| @@ -55,17 +62,25 @@ |
| if (!WatchDogThread::CurrentlyOnWatchDogThread()) { |
| WatchDogThread::PostTask( |
| FROM_HERE, |
| - NewRunnableFunction( |
| - &ThreadWatcher::StartWatching, |
| - thread_id, thread_name, sleep_time, unresponsive_time)); |
| + NewRunnableFunction(&ThreadWatcher::StartWatching, |
| + thread_id, |
| + thread_name, |
| + sleep_time, |
| + unresponsive_time, |
| + crash_on_unresponsive_count, |
| + crash_on_hang)); |
| return; |
| } |
| DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| // Create a new thread watcher object for the given thread and activate it. |
| - ThreadWatcher* watcher = |
| - new ThreadWatcher(thread_id, thread_name, sleep_time, unresponsive_time); |
| + ThreadWatcher* watcher = new ThreadWatcher(thread_id, |
| + thread_name, |
| + sleep_time, |
| + unresponsive_time, |
| + crash_on_unresponsive_count, |
| + crash_on_hang); |
| DCHECK(watcher); |
| // If we couldn't register the thread watcher object, we are shutting down, |
| // then don't activate thread watching. |
| @@ -244,9 +259,8 @@ |
| void ThreadWatcher::GotNoResponse() { |
| DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| - // Record how other threads are responding when we don't get a response for |
| - // ping message atleast kUnresponsiveCount times. |
| - if (++unresponsive_count_ < kUnresponsiveCount) |
| + ++unresponsive_count_; |
| + if (!CrashOnUnresponsiveness()) |
| return; |
| // Record total unresponsive_time since last pong message. |
| @@ -257,6 +271,7 @@ |
| if (hung_processing_complete_) |
| return; |
| + // Record how other threads are responding. |
| int no_of_responding_threads = 0; |
| int no_of_unresponding_threads = 0; |
| ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, |
| @@ -268,17 +283,22 @@ |
| // Record how many watched threads are not responding. |
| unresponsive_count_histogram_->Add(no_of_unresponding_threads); |
| - // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount |
| - // times and if the number of other threads is equal to 1. We picked 1 to |
| - // reduce the number of crashes and to get some sample data. |
| - if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { |
| + // Crash the browser if the watched thread is to be crashed on hang and if the |
| + // number of other threads responding is equal to 1. We picked 1 to reduce the |
|
jar (doing other things)
2011/06/14 00:56:27
We probably need to parameterize this value "1" as
ramant (doing other things)
2011/06/16 22:26:45
Done.
|
| + // number of crashes and to get some sample data. |
| + if (crash_on_hang_ && no_of_responding_threads == 1) { |
| int* crash = NULL; |
| - CHECK(crash++); |
| + CHECK(crash+thread_id_); |
| } |
| hung_processing_complete_ = true; |
| } |
| +bool ThreadWatcher::CrashOnUnresponsiveness() { |
| + DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| + return unresponsive_count_ >= crash_on_unresponsive_count_; |
| +} |
| + |
| // ThreadWatcherList methods and members. |
| // |
| // static |
| @@ -287,16 +307,48 @@ |
| const int ThreadWatcherList::kSleepSeconds = 1; |
| // static |
| const int ThreadWatcherList::kUnresponsiveSeconds = 2; |
| +// static |
| +const int ThreadWatcherList::kUnresponsiveCount = 6; |
| -ThreadWatcherList::ThreadWatcherList() |
| +ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line) |
| : last_wakeup_time_(base::TimeTicks::Now()) { |
| // Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
| // are on UI thread, but Unit tests are not running on UI thread. |
| DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); |
|
jar (doing other things)
2011/06/14 00:56:27
Is this is the only reason we have a lock, perhaps
ramant (doing other things)
2011/06/16 22:26:45
Done.
|
| CHECK(!global_); |
| global_ = this; |
| + |
| // Register Notifications observer. |
| MetricsService::SetUpNotifications(®istrar_, this); |
| + |
| + // Determine the crash_on_unresponsive_count_ based on |
| + // switches::kCrashOnHangSeconds. |
| + crash_on_unresponsive_count_ = kUnresponsiveCount; |
| + |
| + std::string crash_on_hang_seconds = |
| + command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); |
| + if (!crash_on_hang_seconds.empty()) { |
| + int crash_seconds = atoi(crash_on_hang_seconds.c_str()); |
| + if (crash_seconds > 0) { |
| + crash_on_unresponsive_count_ = static_cast<int>( |
| + ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); |
| + } |
| + } |
| + |
| + // Default to crashing the browser if UI or IO threads are not responsive. |
| + std::string crash_on_hang_threads = "UI,IO"; |
| + |
| + // Get the list of unresponsive threads to crash from |
| + // switches::kCrashOnHangThreads. |
| + if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { |
| + crash_on_hang_threads = |
| + command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); |
| + } |
| + |
| + // Save the thread names in a set. |
| + StringTokenizer t(crash_on_hang_threads, ","); |
|
jar (doing other things)
2011/06/14 00:56:27
Unless this is really existing practice, it is bet
ramant (doing other things)
2011/06/16 22:26:45
Done.
|
| + while (t.GetNext()) |
| + crash_on_hang_thread_names_.insert(t.token()); |
| } |
| ThreadWatcherList::~ThreadWatcherList() { |
| @@ -333,26 +385,12 @@ |
| base::TimeDelta::FromSeconds(kSleepSeconds); |
| const base::TimeDelta kUnresponsiveTime = |
| base::TimeDelta::FromSeconds(kUnresponsiveSeconds); |
| - if (BrowserThread::IsMessageLoopValid(BrowserThread::UI)) { |
| - ThreadWatcher::StartWatching(BrowserThread::UI, "UI", kSleepTime, |
| - kUnresponsiveTime); |
| - } |
| - if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) { |
| - ThreadWatcher::StartWatching(BrowserThread::IO, "IO", kSleepTime, |
| - kUnresponsiveTime); |
| - } |
| - if (BrowserThread::IsMessageLoopValid(BrowserThread::DB)) { |
| - ThreadWatcher::StartWatching(BrowserThread::DB, "DB", kSleepTime, |
| - kUnresponsiveTime); |
| - } |
| - if (BrowserThread::IsMessageLoopValid(BrowserThread::FILE)) { |
| - ThreadWatcher::StartWatching(BrowserThread::FILE, "FILE", kSleepTime, |
| - kUnresponsiveTime); |
| - } |
| - if (BrowserThread::IsMessageLoopValid(BrowserThread::CACHE)) { |
| - ThreadWatcher::StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, |
| - kUnresponsiveTime); |
| - } |
| + |
| + StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime); |
| + StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime); |
| + StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime); |
| + StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime); |
| + StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime); |
| } |
| // static |
| @@ -396,13 +434,61 @@ |
| for (RegistrationList::iterator it = global_->registered_.begin(); |
| global_->registered_.end() != it; |
| ++it) { |
| - if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) |
| + if (it->second->CrashOnUnresponsiveness()) |
| + ++(*no_of_unresponding_threads); |
| + else |
| ++(*no_of_responding_threads); |
| - else |
| - ++(*no_of_unresponding_threads); |
| } |
| } |
| +// static |
| +void ThreadWatcherList::StartWatching( |
| + const BrowserThread::ID& thread_id, |
| + const std::string& thread_name, |
| + const base::TimeDelta& sleep_time, |
| + const base::TimeDelta& unresponsive_time) { |
| + DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| + |
| + if (!BrowserThread::IsMessageLoopValid(thread_id)) |
| + return; |
| + |
| + bool crash_on_hang; |
| + uint32 crash_on_unresponsive_count; |
| + { |
| + if (!global_) |
| + return; |
| + base::AutoLock auto_lock(global_->lock_); |
| + |
| + // Hold the lock on global_ so that it is not deleted. |
| + crash_on_hang = global_->PreLockedCrashOnHang(thread_name); |
| + crash_on_unresponsive_count = global_->PreLockedCrashOnUnresponsiveCount(); |
| + } |
| + |
| + ThreadWatcher::StartWatching(thread_id, |
| + thread_name, |
| + sleep_time, |
| + unresponsive_time, |
| + crash_on_unresponsive_count, |
| + crash_on_hang); |
| +} |
| + |
| +uint32 ThreadWatcherList::CrashOnUnresponsiveCount() { |
| + base::AutoLock auto_lock(lock_); |
| + return PreLockedCrashOnUnresponsiveCount(); |
| +} |
| + |
| +bool ThreadWatcherList::CrashOnHang(const std::string& thread_name) { |
| + base::AutoLock auto_lock(lock_); |
| + return PreLockedCrashOnHang(thread_name); |
| +} |
| + |
| +bool ThreadWatcherList::PreLockedCrashOnHang( |
| + const std::string& thread_name) const { |
| + std::set<std::string>::const_iterator it = |
| + crash_on_hang_thread_names_.find(thread_name); |
| + return (it != crash_on_hang_thread_names_.end()); |
| +} |
| + |
| void ThreadWatcherList::DeleteAll() { |
| DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
| base::AutoLock auto_lock(lock_); |
| @@ -421,7 +507,7 @@ |
| bool need_to_awaken = false; |
| base::TimeTicks now = base::TimeTicks::Now(); |
| { |
| - base::AutoLock lock(lock_); |
| + base::AutoLock auto_lock(lock_); |
| if (now - last_wakeup_time_ > base::TimeDelta::FromSeconds(kSleepSeconds)) { |
| need_to_awaken = true; |
| last_wakeup_time_ = now; |