Index: chrome/browser/metrics/thread_watcher.cc |
=================================================================== |
--- chrome/browser/metrics/thread_watcher.cc (revision 88502) |
+++ chrome/browser/metrics/thread_watcher.cc (working copy) |
@@ -2,10 +2,12 @@ |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
+#include "base/string_tokenizer.h" |
#include "base/threading/thread_restrictions.h" |
#include "build/build_config.h" |
#include "chrome/browser/metrics/metrics_service.h" |
#include "chrome/browser/metrics/thread_watcher.h" |
+#include "chrome/common/chrome_switches.h" |
#include "content/common/notification_service.h" |
#if defined(OS_WIN) |
@@ -15,9 +17,6 @@ |
// static |
const int ThreadWatcher::kPingCount = 6; |
-// static |
-const int ThreadWatcher::kUnresponsiveCount = 6; |
- |
// ThreadWatcher methods and members. |
ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, |
const std::string& thread_name, |
@@ -244,9 +243,10 @@ |
void ThreadWatcher::GotNoResponse() { |
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
- // Record how other threads are responding when we don't get a response for |
- // ping message atleast kUnresponsiveCount times. |
- if (++unresponsive_count_ < kUnresponsiveCount) |
+ ++unresponsive_count_; |
+ |
+ // Check if the watched thread's unresponsiveness has gone over the limit. |
+ if (ThreadWatcherList::IsResponsive(this)) |
jar (doing other things)
2011/06/10 00:38:33
This interface surprises me. We are asking a ques
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
return; |
// Record total unresponsive_time since last pong message. |
@@ -268,12 +268,12 @@ |
// Record how many watched threads are not responding. |
unresponsive_count_histogram_->Add(no_of_unresponding_threads); |
- // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount |
- // times and if the number of other threads is equal to 1. We picked 1 to |
- // reduce the number of crashes and to get some sample data. |
- if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { |
+ // Crash the browser if watched thread is in "--crash-on-hang-threads" command |
jar (doing other things)
2011/06/10 00:38:33
This sentence seems to focus on the command line s
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
+ // line switch. We crash if the number of threads responding is equal to 1. We |
+ // picked 1 to reduce the number of crashes and to get some sample data. |
+ if (no_of_responding_threads == 1 && ThreadWatcherList::CrashOnHang(this)) { |
jar (doing other things)
2011/06/10 00:38:33
I'm not sure which is better.... but I suspect we
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
int* crash = NULL; |
- CHECK(crash++); |
+ CHECK(crash+thread_id_); |
} |
hung_processing_complete_ = true; |
@@ -287,8 +287,10 @@ |
const int ThreadWatcherList::kSleepSeconds = 1; |
// static |
const int ThreadWatcherList::kUnresponsiveSeconds = 2; |
+// static |
+const int ThreadWatcherList::kUnresponsiveCount = 6; |
-ThreadWatcherList::ThreadWatcherList() |
+ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line) |
: last_wakeup_time_(base::TimeTicks::Now()) { |
// Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
// are on UI thread, but Unit tests are not running on UI thread. |
@@ -297,11 +299,35 @@ |
global_ = this; |
// Register Notifications observer. |
MetricsService::SetUpNotifications(®istrar_, this); |
+ |
+ crash_on_unresponsive_count_ = kUnresponsiveCount; |
+ std::string crash_on_hang_seconds = |
+ command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); |
+ if (!crash_on_hang_seconds.empty()) { |
+ int crash_seconds = atoi(crash_on_hang_seconds.c_str()); |
+ if (crash_seconds > 0) |
+ crash_on_unresponsive_count_ = crash_seconds / kUnresponsiveSeconds; |
jar (doing other things)
2011/06/10 00:38:33
I don't think you meant to scale it down. If you
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
+ } |
+ |
+ std::string crash_on_hang_threads = |
+ command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); |
+ if (crash_on_hang_threads.empty()) { |
+ // Crash the browser if UI or IO threads are not responsive. |
+ crash_on_hang_threads = "UI,IO"; |
jar (doing other things)
2011/06/10 00:38:33
You probably could use an early return here. Perh
ramant (doing other things)
2011/06/13 03:26:02
Wanted to add UI and IO threads (which we wanted t
|
+ } |
+ StringTokenizer t(crash_on_hang_threads, ","); |
+ while (t.GetNext()) { |
+ std::string thread_name = t.token(); |
+ // We will ignore empty and duplicate thread_names. |
jar (doing other things)
2011/06/10 00:38:33
You probably don't need to worry about dups (which
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
+ if (!thread_name.empty()) |
+ crash_on_hang_thread_names_.insert(thread_name); |
+ } |
} |
ThreadWatcherList::~ThreadWatcherList() { |
base::AutoLock auto_lock(lock_); |
DCHECK(this == global_); |
+ global_->crash_on_hang_thread_names_.clear(); |
jar (doing other things)
2011/06/10 00:38:33
I don't think you need to waste time clear()ing.
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
global_ = NULL; |
} |
@@ -384,6 +410,30 @@ |
} |
// static |
+bool ThreadWatcherList::IsResponsive(ThreadWatcher* watcher) { |
jar (doing other things)
2011/06/10 00:38:33
I'd rather see just the unresponsive_count() passe
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
|
+ DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
+ if (!global_) |
+ return true; |
+ if (watcher->unresponsive_count() < global_->crash_on_unresponsive_count_) |
+ return true; |
+ return false; |
jar (doing other things)
2011/06/10 00:38:33
Except for unusual circumstances (where we plan to
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
|
+} |
+ |
+// static |
+bool ThreadWatcherList::CrashOnHang(ThreadWatcher* watcher) { |
jar (doing other things)
2011/06/10 00:38:33
I'd rather see this take a std::string thread_name
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
+ DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
+ if (!global_) |
+ return false; |
+ if (IsResponsive(watcher)) |
+ return false; |
+ std::set<std::string>::iterator it = |
+ global_->crash_on_hang_thread_names_.find(watcher->thread_name()); |
+ if (it != global_->crash_on_hang_thread_names_.end()) |
+ return true; |
+ return false; |
jar (doing other things)
2011/06/10 00:38:33
return it != global_->crash_on_hang_thread_names_.
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
+} |
+ |
+// static |
void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads, |
int* no_of_unresponding_threads) { |
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
@@ -396,7 +446,8 @@ |
for (RegistrationList::iterator it = global_->registered_.begin(); |
global_->registered_.end() != it; |
++it) { |
- if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) |
+ if (it->second->unresponsive_count() < |
+ global_->crash_on_unresponsive_count_) |
jar (doing other things)
2011/06/10 00:38:33
FWIW: IF you restructured this (pushing the thresh
ramant (doing other things)
2011/06/13 03:26:02
Done.
|
++(*no_of_responding_threads); |
else |
++(*no_of_unresponding_threads); |