Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2961)

Unified Diff: chrome/browser/metrics/thread_watcher.cc

Issue 7134007: Added command line switches "crash-on-hang-threads" and "crash-on-hang-seconds" (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/metrics/thread_watcher.cc
===================================================================
--- chrome/browser/metrics/thread_watcher.cc (revision 88502)
+++ chrome/browser/metrics/thread_watcher.cc (working copy)
@@ -2,10 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "base/string_tokenizer.h"
#include "base/threading/thread_restrictions.h"
#include "build/build_config.h"
#include "chrome/browser/metrics/metrics_service.h"
#include "chrome/browser/metrics/thread_watcher.h"
+#include "chrome/common/chrome_switches.h"
#include "content/common/notification_service.h"
#if defined(OS_WIN)
@@ -15,9 +17,6 @@
// static
const int ThreadWatcher::kPingCount = 6;
-// static
-const int ThreadWatcher::kUnresponsiveCount = 6;
-
// ThreadWatcher methods and members.
ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id,
const std::string& thread_name,
@@ -244,9 +243,10 @@
void ThreadWatcher::GotNoResponse() {
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
- // Record how other threads are responding when we don't get a response for
- // ping message atleast kUnresponsiveCount times.
- if (++unresponsive_count_ < kUnresponsiveCount)
+ ++unresponsive_count_;
+
+ // Check if the watched thread's unresponsiveness has gone over the limit.
+ if (ThreadWatcherList::IsResponsive(this))
jar (doing other things) 2011/06/10 00:38:33 This interface surprises me. We are asking a ques
ramant (doing other things) 2011/06/13 03:26:02 Done.
return;
// Record total unresponsive_time since last pong message.
@@ -268,12 +268,12 @@
// Record how many watched threads are not responding.
unresponsive_count_histogram_->Add(no_of_unresponding_threads);
- // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount
- // times and if the number of other threads is equal to 1. We picked 1 to
- // reduce the number of crashes and to get some sample data.
- if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) {
+ // Crash the browser if watched thread is in "--crash-on-hang-threads" command
jar (doing other things) 2011/06/10 00:38:33 This sentence seems to focus on the command line s
ramant (doing other things) 2011/06/13 03:26:02 Done.
+ // line switch. We crash if the number of threads responding is equal to 1. We
+ // picked 1 to reduce the number of crashes and to get some sample data.
+ if (no_of_responding_threads == 1 && ThreadWatcherList::CrashOnHang(this)) {
jar (doing other things) 2011/06/10 00:38:33 I'm not sure which is better.... but I suspect we
ramant (doing other things) 2011/06/13 03:26:02 Done.
int* crash = NULL;
- CHECK(crash++);
+ CHECK(crash+thread_id_);
}
hung_processing_complete_ = true;
@@ -287,8 +287,10 @@
const int ThreadWatcherList::kSleepSeconds = 1;
// static
const int ThreadWatcherList::kUnresponsiveSeconds = 2;
+// static
+const int ThreadWatcherList::kUnresponsiveCount = 6;
-ThreadWatcherList::ThreadWatcherList()
+ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line)
: last_wakeup_time_(base::TimeTicks::Now()) {
// Assert we are not running on WATCHDOG thread. Would be ideal to assert we
// are on UI thread, but Unit tests are not running on UI thread.
@@ -297,11 +299,35 @@
global_ = this;
// Register Notifications observer.
MetricsService::SetUpNotifications(&registrar_, this);
+
+ crash_on_unresponsive_count_ = kUnresponsiveCount;
+ std::string crash_on_hang_seconds =
+ command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds);
+ if (!crash_on_hang_seconds.empty()) {
+ int crash_seconds = atoi(crash_on_hang_seconds.c_str());
+ if (crash_seconds > 0)
+ crash_on_unresponsive_count_ = crash_seconds / kUnresponsiveSeconds;
jar (doing other things) 2011/06/10 00:38:33 I don't think you meant to scale it down. If you
ramant (doing other things) 2011/06/13 03:26:02 Done.
+ }
+
+ std::string crash_on_hang_threads =
+ command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
+ if (crash_on_hang_threads.empty()) {
+ // Crash the browser if UI or IO threads are not responsive.
+ crash_on_hang_threads = "UI,IO";
jar (doing other things) 2011/06/10 00:38:33 You probably could use an early return here. Perh
ramant (doing other things) 2011/06/13 03:26:02 Wanted to add UI and IO threads (which we wanted t
+ }
+ StringTokenizer t(crash_on_hang_threads, ",");
+ while (t.GetNext()) {
+ std::string thread_name = t.token();
+ // We will ignore empty and duplicate thread_names.
jar (doing other things) 2011/06/10 00:38:33 You probably don't need to worry about dups (which
ramant (doing other things) 2011/06/13 03:26:02 Done.
+ if (!thread_name.empty())
+ crash_on_hang_thread_names_.insert(thread_name);
+ }
}
ThreadWatcherList::~ThreadWatcherList() {
base::AutoLock auto_lock(lock_);
DCHECK(this == global_);
+ global_->crash_on_hang_thread_names_.clear();
jar (doing other things) 2011/06/10 00:38:33 I don't think you need to waste time clear()ing.
ramant (doing other things) 2011/06/13 03:26:02 Done.
global_ = NULL;
}
@@ -384,6 +410,30 @@
}
// static
+bool ThreadWatcherList::IsResponsive(ThreadWatcher* watcher) {
jar (doing other things) 2011/06/10 00:38:33 I'd rather see just the unresponsive_count() passe
ramant (doing other things) 2011/06/13 03:26:02 Moved this method into ThreadWatcher. Done.
+ DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
+ if (!global_)
+ return true;
+ if (watcher->unresponsive_count() < global_->crash_on_unresponsive_count_)
+ return true;
+ return false;
jar (doing other things) 2011/06/10 00:38:33 Except for unusual circumstances (where we plan to
ramant (doing other things) 2011/06/13 03:26:02 Moved this method into ThreadWatcher. Done.
+}
+
+// static
+bool ThreadWatcherList::CrashOnHang(ThreadWatcher* watcher) {
jar (doing other things) 2011/06/10 00:38:33 I'd rather see this take a std::string thread_name
ramant (doing other things) 2011/06/13 03:26:02 Done.
+ DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
+ if (!global_)
+ return false;
+ if (IsResponsive(watcher))
+ return false;
+ std::set<std::string>::iterator it =
+ global_->crash_on_hang_thread_names_.find(watcher->thread_name());
+ if (it != global_->crash_on_hang_thread_names_.end())
+ return true;
+ return false;
jar (doing other things) 2011/06/10 00:38:33 return it != global_->crash_on_hang_thread_names_.
ramant (doing other things) 2011/06/13 03:26:02 Done.
+}
+
+// static
void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads,
int* no_of_unresponding_threads) {
DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
@@ -396,7 +446,8 @@
for (RegistrationList::iterator it = global_->registered_.begin();
global_->registered_.end() != it;
++it) {
- if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount)
+ if (it->second->unresponsive_count() <
+ global_->crash_on_unresponsive_count_)
jar (doing other things) 2011/06/10 00:38:33 FWIW: IF you restructured this (pushing the thresh
ramant (doing other things) 2011/06/13 03:26:02 Done.
++(*no_of_responding_threads);
else
++(*no_of_unresponding_threads);

Powered by Google App Engine
This is Rietveld 408576698