Index: chrome/browser/metrics/thread_watcher.cc |
=================================================================== |
--- chrome/browser/metrics/thread_watcher.cc (revision 88502) |
+++ chrome/browser/metrics/thread_watcher.cc (working copy) |
@@ -2,10 +2,14 @@ |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
+#include <math.h> // ceil |
+ |
+#include "base/string_tokenizer.h" |
#include "base/threading/thread_restrictions.h" |
#include "build/build_config.h" |
#include "chrome/browser/metrics/metrics_service.h" |
#include "chrome/browser/metrics/thread_watcher.h" |
+#include "chrome/common/chrome_switches.h" |
#include "content/common/notification_service.h" |
#if defined(OS_WIN) |
@@ -15,14 +19,13 @@ |
// static |
const int ThreadWatcher::kPingCount = 6; |
-// static |
-const int ThreadWatcher::kUnresponsiveCount = 6; |
- |
// ThreadWatcher methods and members. |
ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, |
const std::string& thread_name, |
const base::TimeDelta& sleep_time, |
- const base::TimeDelta& unresponsive_time) |
+ const base::TimeDelta& unresponsive_time, |
+ uint32 crash_on_unresponsive_count, |
+ bool crash_on_hang) |
: thread_id_(thread_id), |
thread_name_(thread_name), |
sleep_time_(sleep_time), |
@@ -36,6 +39,8 @@ |
unresponsive_time_histogram_(NULL), |
unresponsive_count_(0), |
hung_processing_complete_(false), |
+ crash_on_unresponsive_count_(crash_on_unresponsive_count), |
+ crash_on_hang_(crash_on_hang), |
ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { |
Initialize(); |
} |
@@ -46,7 +51,9 @@ |
void ThreadWatcher::StartWatching(const BrowserThread::ID& thread_id, |
const std::string& thread_name, |
const base::TimeDelta& sleep_time, |
- const base::TimeDelta& unresponsive_time) { |
+ const base::TimeDelta& unresponsive_time, |
+ uint32 crash_on_unresponsive_count, |
+ bool crash_on_hang) { |
DCHECK_GE(sleep_time.InMilliseconds(), 0); |
DCHECK_GE(unresponsive_time.InMilliseconds(), sleep_time.InMilliseconds()); |
@@ -55,17 +62,25 @@ |
if (!WatchDogThread::CurrentlyOnWatchDogThread()) { |
WatchDogThread::PostTask( |
FROM_HERE, |
- NewRunnableFunction( |
- &ThreadWatcher::StartWatching, |
- thread_id, thread_name, sleep_time, unresponsive_time)); |
+ NewRunnableFunction(&ThreadWatcher::StartWatching, |
+ thread_id, |
+ thread_name, |
+ sleep_time, |
+ unresponsive_time, |
+ crash_on_unresponsive_count, |
+ crash_on_hang)); |
return; |
} |
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
// Create a new thread watcher object for the given thread and activate it. |
- ThreadWatcher* watcher = |
- new ThreadWatcher(thread_id, thread_name, sleep_time, unresponsive_time); |
+ ThreadWatcher* watcher = new ThreadWatcher(thread_id, |
+ thread_name, |
+ sleep_time, |
+ unresponsive_time, |
+ crash_on_unresponsive_count, |
+ crash_on_hang); |
DCHECK(watcher); |
// If we couldn't register the thread watcher object, we are shutting down, |
// then don't activate thread watching. |
@@ -244,9 +259,8 @@ |
void ThreadWatcher::GotNoResponse() { |
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
- // Record how other threads are responding when we don't get a response for |
- // ping message atleast kUnresponsiveCount times. |
- if (++unresponsive_count_ < kUnresponsiveCount) |
+ ++unresponsive_count_; |
+ if (!CrashOnUnresponsiveness()) |
return; |
// Record total unresponsive_time since last pong message. |
@@ -257,6 +271,7 @@ |
if (hung_processing_complete_) |
return; |
+ // Record how other threads are responding. |
int no_of_responding_threads = 0; |
int no_of_unresponding_threads = 0; |
ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, |
@@ -268,17 +283,22 @@ |
// Record how many watched threads are not responding. |
unresponsive_count_histogram_->Add(no_of_unresponding_threads); |
- // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount |
- // times and if the number of other threads is equal to 1. We picked 1 to |
- // reduce the number of crashes and to get some sample data. |
- if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { |
+ // Crash the browser if the watched thread is to be crashed on hang and if the |
+ // number of other threads responding is equal to 1. We picked 1 to reduce the |
jar (doing other things)
2011/06/14 00:56:27
We probably need to parameterize this value "1" as
ramant (doing other things)
2011/06/16 22:26:45
Done.
|
+ // number of crashes and to get some sample data. |
+ if (crash_on_hang_ && no_of_responding_threads == 1) { |
int* crash = NULL; |
- CHECK(crash++); |
+ CHECK(crash+thread_id_); |
} |
hung_processing_complete_ = true; |
} |
+bool ThreadWatcher::CrashOnUnresponsiveness() { |
+ DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
+ return unresponsive_count_ >= crash_on_unresponsive_count_; |
+} |
+ |
// ThreadWatcherList methods and members. |
// |
// static |
@@ -287,16 +307,48 @@ |
const int ThreadWatcherList::kSleepSeconds = 1; |
// static |
const int ThreadWatcherList::kUnresponsiveSeconds = 2; |
+// static |
+const int ThreadWatcherList::kUnresponsiveCount = 6; |
-ThreadWatcherList::ThreadWatcherList() |
+ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line) |
: last_wakeup_time_(base::TimeTicks::Now()) { |
// Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
// are on UI thread, but Unit tests are not running on UI thread. |
DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); |
jar (doing other things)
2011/06/14 00:56:27
Is this is the only reason we have a lock, perhaps
ramant (doing other things)
2011/06/16 22:26:45
Done.
|
CHECK(!global_); |
global_ = this; |
+ |
// Register Notifications observer. |
MetricsService::SetUpNotifications(®istrar_, this); |
+ |
+ // Determine the crash_on_unresponsive_count_ based on |
+ // switches::kCrashOnHangSeconds. |
+ crash_on_unresponsive_count_ = kUnresponsiveCount; |
+ |
+ std::string crash_on_hang_seconds = |
+ command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); |
+ if (!crash_on_hang_seconds.empty()) { |
+ int crash_seconds = atoi(crash_on_hang_seconds.c_str()); |
+ if (crash_seconds > 0) { |
+ crash_on_unresponsive_count_ = static_cast<int>( |
+ ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); |
+ } |
+ } |
+ |
+ // Default to crashing the browser if UI or IO threads are not responsive. |
+ std::string crash_on_hang_threads = "UI,IO"; |
+ |
+ // Get the list of unresponsive threads to crash from |
+ // switches::kCrashOnHangThreads. |
+ if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { |
+ crash_on_hang_threads = |
+ command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); |
+ } |
+ |
+ // Save the thread names in a set. |
+ StringTokenizer t(crash_on_hang_threads, ","); |
jar (doing other things)
2011/06/14 00:56:27
Unless this is really existing practice, it is bet
ramant (doing other things)
2011/06/16 22:26:45
Done.
|
+ while (t.GetNext()) |
+ crash_on_hang_thread_names_.insert(t.token()); |
} |
ThreadWatcherList::~ThreadWatcherList() { |
@@ -333,26 +385,12 @@ |
base::TimeDelta::FromSeconds(kSleepSeconds); |
const base::TimeDelta kUnresponsiveTime = |
base::TimeDelta::FromSeconds(kUnresponsiveSeconds); |
- if (BrowserThread::IsMessageLoopValid(BrowserThread::UI)) { |
- ThreadWatcher::StartWatching(BrowserThread::UI, "UI", kSleepTime, |
- kUnresponsiveTime); |
- } |
- if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) { |
- ThreadWatcher::StartWatching(BrowserThread::IO, "IO", kSleepTime, |
- kUnresponsiveTime); |
- } |
- if (BrowserThread::IsMessageLoopValid(BrowserThread::DB)) { |
- ThreadWatcher::StartWatching(BrowserThread::DB, "DB", kSleepTime, |
- kUnresponsiveTime); |
- } |
- if (BrowserThread::IsMessageLoopValid(BrowserThread::FILE)) { |
- ThreadWatcher::StartWatching(BrowserThread::FILE, "FILE", kSleepTime, |
- kUnresponsiveTime); |
- } |
- if (BrowserThread::IsMessageLoopValid(BrowserThread::CACHE)) { |
- ThreadWatcher::StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, |
- kUnresponsiveTime); |
- } |
+ |
+ StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime); |
+ StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime); |
+ StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime); |
+ StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime); |
+ StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime); |
} |
// static |
@@ -396,13 +434,61 @@ |
for (RegistrationList::iterator it = global_->registered_.begin(); |
global_->registered_.end() != it; |
++it) { |
- if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) |
+ if (it->second->CrashOnUnresponsiveness()) |
+ ++(*no_of_unresponding_threads); |
+ else |
++(*no_of_responding_threads); |
- else |
- ++(*no_of_unresponding_threads); |
} |
} |
+// static |
+void ThreadWatcherList::StartWatching( |
+ const BrowserThread::ID& thread_id, |
+ const std::string& thread_name, |
+ const base::TimeDelta& sleep_time, |
+ const base::TimeDelta& unresponsive_time) { |
+ DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
+ |
+ if (!BrowserThread::IsMessageLoopValid(thread_id)) |
+ return; |
+ |
+ bool crash_on_hang; |
+ uint32 crash_on_unresponsive_count; |
+ { |
+ if (!global_) |
+ return; |
+ base::AutoLock auto_lock(global_->lock_); |
+ |
+ // Hold the lock on global_ so that it is not deleted. |
+ crash_on_hang = global_->PreLockedCrashOnHang(thread_name); |
+ crash_on_unresponsive_count = global_->PreLockedCrashOnUnresponsiveCount(); |
+ } |
+ |
+ ThreadWatcher::StartWatching(thread_id, |
+ thread_name, |
+ sleep_time, |
+ unresponsive_time, |
+ crash_on_unresponsive_count, |
+ crash_on_hang); |
+} |
+ |
+uint32 ThreadWatcherList::CrashOnUnresponsiveCount() { |
+ base::AutoLock auto_lock(lock_); |
+ return PreLockedCrashOnUnresponsiveCount(); |
+} |
+ |
+bool ThreadWatcherList::CrashOnHang(const std::string& thread_name) { |
+ base::AutoLock auto_lock(lock_); |
+ return PreLockedCrashOnHang(thread_name); |
+} |
+ |
+bool ThreadWatcherList::PreLockedCrashOnHang( |
+ const std::string& thread_name) const { |
+ std::set<std::string>::const_iterator it = |
+ crash_on_hang_thread_names_.find(thread_name); |
+ return (it != crash_on_hang_thread_names_.end()); |
+} |
+ |
void ThreadWatcherList::DeleteAll() { |
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
base::AutoLock auto_lock(lock_); |
@@ -421,7 +507,7 @@ |
bool need_to_awaken = false; |
base::TimeTicks now = base::TimeTicks::Now(); |
{ |
- base::AutoLock lock(lock_); |
+ base::AutoLock auto_lock(lock_); |
if (now - last_wakeup_time_ > base::TimeDelta::FromSeconds(kSleepSeconds)) { |
need_to_awaken = true; |
last_wakeup_time_ = now; |