Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(436)

Side by Side Diff: chrome/browser/metrics/thread_watcher.cc

Issue 7134007: Added command line switches "crash-on-hang-threads" and "crash-on-hang-seconds" (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> // ceil
6
7 #include "base/string_tokenizer.h"
5 #include "base/threading/thread_restrictions.h" 8 #include "base/threading/thread_restrictions.h"
6 #include "build/build_config.h" 9 #include "build/build_config.h"
7 #include "chrome/browser/metrics/metrics_service.h" 10 #include "chrome/browser/metrics/metrics_service.h"
8 #include "chrome/browser/metrics/thread_watcher.h" 11 #include "chrome/browser/metrics/thread_watcher.h"
12 #include "chrome/common/chrome_switches.h"
9 #include "content/common/notification_service.h" 13 #include "content/common/notification_service.h"
10 14
11 #if defined(OS_WIN) 15 #if defined(OS_WIN)
12 #include <Objbase.h> 16 #include <Objbase.h>
13 #endif 17 #endif
14 18
15 // static 19 // static
16 const int ThreadWatcher::kPingCount = 6; 20 const int ThreadWatcher::kPingCount = 6;
17 21
18 // static
19 const int ThreadWatcher::kUnresponsiveCount = 6;
20
21 // ThreadWatcher methods and members. 22 // ThreadWatcher methods and members.
22 ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, 23 ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id,
23 const std::string& thread_name, 24 const std::string& thread_name,
24 const base::TimeDelta& sleep_time, 25 const base::TimeDelta& sleep_time,
25 const base::TimeDelta& unresponsive_time) 26 const base::TimeDelta& unresponsive_time,
27 uint32 crash_on_unresponsive_count,
28 bool crash_on_hang)
26 : thread_id_(thread_id), 29 : thread_id_(thread_id),
27 thread_name_(thread_name), 30 thread_name_(thread_name),
28 sleep_time_(sleep_time), 31 sleep_time_(sleep_time),
29 unresponsive_time_(unresponsive_time), 32 unresponsive_time_(unresponsive_time),
30 ping_time_(base::TimeTicks::Now()), 33 ping_time_(base::TimeTicks::Now()),
31 pong_time_(ping_time_), 34 pong_time_(ping_time_),
32 ping_sequence_number_(0), 35 ping_sequence_number_(0),
33 active_(false), 36 active_(false),
34 ping_count_(kPingCount), 37 ping_count_(kPingCount),
35 response_time_histogram_(NULL), 38 response_time_histogram_(NULL),
36 unresponsive_time_histogram_(NULL), 39 unresponsive_time_histogram_(NULL),
37 unresponsive_count_(0), 40 unresponsive_count_(0),
38 hung_processing_complete_(false), 41 hung_processing_complete_(false),
42 crash_on_unresponsive_count_(crash_on_unresponsive_count),
43 crash_on_hang_(crash_on_hang),
39 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) { 44 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
40 Initialize(); 45 Initialize();
41 } 46 }
42 47
43 ThreadWatcher::~ThreadWatcher() {} 48 ThreadWatcher::~ThreadWatcher() {}
44 49
45 // static 50 // static
46 void ThreadWatcher::StartWatching(const BrowserThread::ID& thread_id, 51 void ThreadWatcher::StartWatching(const BrowserThread::ID& thread_id,
47 const std::string& thread_name, 52 const std::string& thread_name,
48 const base::TimeDelta& sleep_time, 53 const base::TimeDelta& sleep_time,
49 const base::TimeDelta& unresponsive_time) { 54 const base::TimeDelta& unresponsive_time,
55 uint32 crash_on_unresponsive_count,
56 bool crash_on_hang) {
50 DCHECK_GE(sleep_time.InMilliseconds(), 0); 57 DCHECK_GE(sleep_time.InMilliseconds(), 0);
51 DCHECK_GE(unresponsive_time.InMilliseconds(), sleep_time.InMilliseconds()); 58 DCHECK_GE(unresponsive_time.InMilliseconds(), sleep_time.InMilliseconds());
52 59
53 // If we are not on WatchDogThread, then post a task to call StartWatching on 60 // If we are not on WatchDogThread, then post a task to call StartWatching on
54 // WatchDogThread. 61 // WatchDogThread.
55 if (!WatchDogThread::CurrentlyOnWatchDogThread()) { 62 if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
56 WatchDogThread::PostTask( 63 WatchDogThread::PostTask(
57 FROM_HERE, 64 FROM_HERE,
58 NewRunnableFunction( 65 NewRunnableFunction(&ThreadWatcher::StartWatching,
59 &ThreadWatcher::StartWatching, 66 thread_id,
60 thread_id, thread_name, sleep_time, unresponsive_time)); 67 thread_name,
68 sleep_time,
69 unresponsive_time,
70 crash_on_unresponsive_count,
71 crash_on_hang));
61 return; 72 return;
62 } 73 }
63 74
64 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 75 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
65 76
66 // Create a new thread watcher object for the given thread and activate it. 77 // Create a new thread watcher object for the given thread and activate it.
67 ThreadWatcher* watcher = 78 ThreadWatcher* watcher = new ThreadWatcher(thread_id,
68 new ThreadWatcher(thread_id, thread_name, sleep_time, unresponsive_time); 79 thread_name,
80 sleep_time,
81 unresponsive_time,
82 crash_on_unresponsive_count,
83 crash_on_hang);
69 DCHECK(watcher); 84 DCHECK(watcher);
70 // If we couldn't register the thread watcher object, we are shutting down, 85 // If we couldn't register the thread watcher object, we are shutting down,
71 // then don't activate thread watching. 86 // then don't activate thread watching.
72 if (!ThreadWatcherList::IsRegistered(thread_id)) 87 if (!ThreadWatcherList::IsRegistered(thread_id))
73 return; 88 return;
74 watcher->ActivateThreadWatching(); 89 watcher->ActivateThreadWatching();
75 } 90 }
76 91
77 void ThreadWatcher::ActivateThreadWatching() { 92 void ThreadWatcher::ActivateThreadWatching() {
78 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 93 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
237 252
238 void ThreadWatcher::ResetHangCounters() { 253 void ThreadWatcher::ResetHangCounters() {
239 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 254 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
240 unresponsive_count_ = 0; 255 unresponsive_count_ = 0;
241 hung_processing_complete_ = false; 256 hung_processing_complete_ = false;
242 } 257 }
243 258
244 void ThreadWatcher::GotNoResponse() { 259 void ThreadWatcher::GotNoResponse() {
245 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 260 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
246 261
247 // Record how other threads are responding when we don't get a response for 262 ++unresponsive_count_;
248 // ping message atleast kUnresponsiveCount times. 263 if (!CrashOnUnresponsiveness())
249 if (++unresponsive_count_ < kUnresponsiveCount)
250 return; 264 return;
251 265
252 // Record total unresponsive_time since last pong message. 266 // Record total unresponsive_time since last pong message.
253 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; 267 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
254 unresponsive_time_histogram_->AddTime(unresponse_time); 268 unresponsive_time_histogram_->AddTime(unresponse_time);
255 269
256 // We have already collected stats for the non-responding watched thread. 270 // We have already collected stats for the non-responding watched thread.
257 if (hung_processing_complete_) 271 if (hung_processing_complete_)
258 return; 272 return;
259 273
274 // Record how other threads are responding.
260 int no_of_responding_threads = 0; 275 int no_of_responding_threads = 0;
261 int no_of_unresponding_threads = 0; 276 int no_of_unresponding_threads = 0;
262 ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, 277 ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads,
263 &no_of_unresponding_threads); 278 &no_of_unresponding_threads);
264 279
265 // Record how many watched threads are responding. 280 // Record how many watched threads are responding.
266 responsive_count_histogram_->Add(no_of_responding_threads); 281 responsive_count_histogram_->Add(no_of_responding_threads);
267 282
268 // Record how many watched threads are not responding. 283 // Record how many watched threads are not responding.
269 unresponsive_count_histogram_->Add(no_of_unresponding_threads); 284 unresponsive_count_histogram_->Add(no_of_unresponding_threads);
270 285
271 // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount 286 // Crash the browser if the watched thread is to be crashed on hang and if the
272 // times and if the number of other threads is equal to 1. We picked 1 to 287 // number of other threads responding is equal to 1. We picked 1 to reduce the
jar (doing other things) 2011/06/14 00:56:27 We probably need to parameterize this value "1" as
ramant (doing other things) 2011/06/16 22:26:45 Done.
273 // reduce the number of crashes and to get some sample data. 288 // number of crashes and to get some sample data.
274 if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { 289 if (crash_on_hang_ && no_of_responding_threads == 1) {
275 int* crash = NULL; 290 int* crash = NULL;
276 CHECK(crash++); 291 CHECK(crash+thread_id_);
277 } 292 }
278 293
279 hung_processing_complete_ = true; 294 hung_processing_complete_ = true;
280 } 295 }
281 296
297 bool ThreadWatcher::CrashOnUnresponsiveness() {
298 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
299 return unresponsive_count_ >= crash_on_unresponsive_count_;
300 }
301
282 // ThreadWatcherList methods and members. 302 // ThreadWatcherList methods and members.
283 // 303 //
284 // static 304 // static
285 ThreadWatcherList* ThreadWatcherList::global_ = NULL; 305 ThreadWatcherList* ThreadWatcherList::global_ = NULL;
286 // static 306 // static
287 const int ThreadWatcherList::kSleepSeconds = 1; 307 const int ThreadWatcherList::kSleepSeconds = 1;
288 // static 308 // static
289 const int ThreadWatcherList::kUnresponsiveSeconds = 2; 309 const int ThreadWatcherList::kUnresponsiveSeconds = 2;
310 // static
311 const int ThreadWatcherList::kUnresponsiveCount = 6;
290 312
291 ThreadWatcherList::ThreadWatcherList() 313 ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line)
292 : last_wakeup_time_(base::TimeTicks::Now()) { 314 : last_wakeup_time_(base::TimeTicks::Now()) {
293 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we 315 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we
294 // are on UI thread, but Unit tests are not running on UI thread. 316 // are on UI thread, but Unit tests are not running on UI thread.
295 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); 317 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread());
jar (doing other things) 2011/06/14 00:56:27 Is this is the only reason we have a lock, perhaps
ramant (doing other things) 2011/06/16 22:26:45 Done.
296 CHECK(!global_); 318 CHECK(!global_);
297 global_ = this; 319 global_ = this;
320
298 // Register Notifications observer. 321 // Register Notifications observer.
299 MetricsService::SetUpNotifications(&registrar_, this); 322 MetricsService::SetUpNotifications(&registrar_, this);
323
324 // Determine the crash_on_unresponsive_count_ based on
325 // switches::kCrashOnHangSeconds.
326 crash_on_unresponsive_count_ = kUnresponsiveCount;
327
328 std::string crash_on_hang_seconds =
329 command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds);
330 if (!crash_on_hang_seconds.empty()) {
331 int crash_seconds = atoi(crash_on_hang_seconds.c_str());
332 if (crash_seconds > 0) {
333 crash_on_unresponsive_count_ = static_cast<int>(
334 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
335 }
336 }
337
338 // Default to crashing the browser if UI or IO threads are not responsive.
339 std::string crash_on_hang_threads = "UI,IO";
340
341 // Get the list of unresponsive threads to crash from
342 // switches::kCrashOnHangThreads.
343 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
344 crash_on_hang_threads =
345 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
346 }
347
348 // Save the thread names in a set.
349 StringTokenizer t(crash_on_hang_threads, ",");
jar (doing other things) 2011/06/14 00:56:27 Unless this is really existing practice, it is bet
ramant (doing other things) 2011/06/16 22:26:45 Done.
350 while (t.GetNext())
351 crash_on_hang_thread_names_.insert(t.token());
300 } 352 }
301 353
302 ThreadWatcherList::~ThreadWatcherList() { 354 ThreadWatcherList::~ThreadWatcherList() {
303 base::AutoLock auto_lock(lock_); 355 base::AutoLock auto_lock(lock_);
304 DCHECK(this == global_); 356 DCHECK(this == global_);
305 global_ = NULL; 357 global_ = NULL;
306 } 358 }
307 359
308 // static 360 // static
309 void ThreadWatcherList::Register(ThreadWatcher* watcher) { 361 void ThreadWatcherList::Register(ThreadWatcher* watcher) {
(...skipping 16 matching lines...) Expand all
326 FROM_HERE, 378 FROM_HERE,
327 NewRunnableFunction(&ThreadWatcherList::StartWatchingAll), 379 NewRunnableFunction(&ThreadWatcherList::StartWatchingAll),
328 base::TimeDelta::FromSeconds(120).InMilliseconds()); 380 base::TimeDelta::FromSeconds(120).InMilliseconds());
329 return; 381 return;
330 } 382 }
331 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 383 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
332 const base::TimeDelta kSleepTime = 384 const base::TimeDelta kSleepTime =
333 base::TimeDelta::FromSeconds(kSleepSeconds); 385 base::TimeDelta::FromSeconds(kSleepSeconds);
334 const base::TimeDelta kUnresponsiveTime = 386 const base::TimeDelta kUnresponsiveTime =
335 base::TimeDelta::FromSeconds(kUnresponsiveSeconds); 387 base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
336 if (BrowserThread::IsMessageLoopValid(BrowserThread::UI)) { 388
337 ThreadWatcher::StartWatching(BrowserThread::UI, "UI", kSleepTime, 389 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime);
338 kUnresponsiveTime); 390 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime);
339 } 391 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime);
340 if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) { 392 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime);
341 ThreadWatcher::StartWatching(BrowserThread::IO, "IO", kSleepTime, 393 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime);
342 kUnresponsiveTime);
343 }
344 if (BrowserThread::IsMessageLoopValid(BrowserThread::DB)) {
345 ThreadWatcher::StartWatching(BrowserThread::DB, "DB", kSleepTime,
346 kUnresponsiveTime);
347 }
348 if (BrowserThread::IsMessageLoopValid(BrowserThread::FILE)) {
349 ThreadWatcher::StartWatching(BrowserThread::FILE, "FILE", kSleepTime,
350 kUnresponsiveTime);
351 }
352 if (BrowserThread::IsMessageLoopValid(BrowserThread::CACHE)) {
353 ThreadWatcher::StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime,
354 kUnresponsiveTime);
355 }
356 } 394 }
357 395
358 // static 396 // static
359 void ThreadWatcherList::StopWatchingAll() { 397 void ThreadWatcherList::StopWatchingAll() {
360 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we 398 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we
361 // are on UI thread, but Unit tests are not running on UI thread. 399 // are on UI thread, but Unit tests are not running on UI thread.
362 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); 400 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread());
363 if (!global_) 401 if (!global_)
364 return; 402 return;
365 403
(...skipping 23 matching lines...) Expand all
389 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 427 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
390 *no_of_responding_threads = 0; 428 *no_of_responding_threads = 0;
391 *no_of_unresponding_threads = 0; 429 *no_of_unresponding_threads = 0;
392 if (!global_) 430 if (!global_)
393 return; 431 return;
394 432
395 base::AutoLock auto_lock(global_->lock_); 433 base::AutoLock auto_lock(global_->lock_);
396 for (RegistrationList::iterator it = global_->registered_.begin(); 434 for (RegistrationList::iterator it = global_->registered_.begin();
397 global_->registered_.end() != it; 435 global_->registered_.end() != it;
398 ++it) { 436 ++it) {
399 if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) 437 if (it->second->CrashOnUnresponsiveness())
438 ++(*no_of_unresponding_threads);
439 else
400 ++(*no_of_responding_threads); 440 ++(*no_of_responding_threads);
401 else
402 ++(*no_of_unresponding_threads);
403 } 441 }
404 } 442 }
405 443
444 // static
445 void ThreadWatcherList::StartWatching(
446 const BrowserThread::ID& thread_id,
447 const std::string& thread_name,
448 const base::TimeDelta& sleep_time,
449 const base::TimeDelta& unresponsive_time) {
450 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
451
452 if (!BrowserThread::IsMessageLoopValid(thread_id))
453 return;
454
455 bool crash_on_hang;
456 uint32 crash_on_unresponsive_count;
457 {
458 if (!global_)
459 return;
460 base::AutoLock auto_lock(global_->lock_);
461
462 // Hold the lock on global_ so that it is not deleted.
463 crash_on_hang = global_->PreLockedCrashOnHang(thread_name);
464 crash_on_unresponsive_count = global_->PreLockedCrashOnUnresponsiveCount();
465 }
466
467 ThreadWatcher::StartWatching(thread_id,
468 thread_name,
469 sleep_time,
470 unresponsive_time,
471 crash_on_unresponsive_count,
472 crash_on_hang);
473 }
474
475 uint32 ThreadWatcherList::CrashOnUnresponsiveCount() {
476 base::AutoLock auto_lock(lock_);
477 return PreLockedCrashOnUnresponsiveCount();
478 }
479
480 bool ThreadWatcherList::CrashOnHang(const std::string& thread_name) {
481 base::AutoLock auto_lock(lock_);
482 return PreLockedCrashOnHang(thread_name);
483 }
484
485 bool ThreadWatcherList::PreLockedCrashOnHang(
486 const std::string& thread_name) const {
487 std::set<std::string>::const_iterator it =
488 crash_on_hang_thread_names_.find(thread_name);
489 return (it != crash_on_hang_thread_names_.end());
490 }
491
406 void ThreadWatcherList::DeleteAll() { 492 void ThreadWatcherList::DeleteAll() {
407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); 493 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
408 base::AutoLock auto_lock(lock_); 494 base::AutoLock auto_lock(lock_);
409 while (!registered_.empty()) { 495 while (!registered_.empty()) {
410 RegistrationList::iterator it = registered_.begin(); 496 RegistrationList::iterator it = registered_.begin();
411 delete it->second; 497 delete it->second;
412 registered_.erase(it->first); 498 registered_.erase(it->first);
413 } 499 }
414 } 500 }
415 501
416 void ThreadWatcherList::Observe(NotificationType type, 502 void ThreadWatcherList::Observe(NotificationType type,
417 const NotificationSource& source, 503 const NotificationSource& source,
418 const NotificationDetails& details) { 504 const NotificationDetails& details) {
419 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 505 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
420 // There is some user activity, see if thread watchers are to be awakened. 506 // There is some user activity, see if thread watchers are to be awakened.
421 bool need_to_awaken = false; 507 bool need_to_awaken = false;
422 base::TimeTicks now = base::TimeTicks::Now(); 508 base::TimeTicks now = base::TimeTicks::Now();
423 { 509 {
424 base::AutoLock lock(lock_); 510 base::AutoLock auto_lock(lock_);
425 if (now - last_wakeup_time_ > base::TimeDelta::FromSeconds(kSleepSeconds)) { 511 if (now - last_wakeup_time_ > base::TimeDelta::FromSeconds(kSleepSeconds)) {
426 need_to_awaken = true; 512 need_to_awaken = true;
427 last_wakeup_time_ = now; 513 last_wakeup_time_ = now;
428 } 514 }
429 } 515 }
430 if (need_to_awaken) { 516 if (need_to_awaken) {
431 WatchDogThread::PostTask( 517 WatchDogThread::PostTask(
432 FROM_HERE, 518 FROM_HERE,
433 NewRunnableMethod(this, &ThreadWatcherList::WakeUpAll)); 519 NewRunnableMethod(this, &ThreadWatcherList::WakeUpAll));
434 } 520 }
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
541 watchdog_thread_ = NULL; 627 watchdog_thread_ = NULL;
542 } 628 }
543 629
544 void WatchDogThread::CleanUpAfterMessageLoopDestruction() { 630 void WatchDogThread::CleanUpAfterMessageLoopDestruction() {
545 #if defined(OS_WIN) 631 #if defined(OS_WIN)
546 // Closes the COM library on the current thread. CoInitialize must 632 // Closes the COM library on the current thread. CoInitialize must
547 // be balanced by a corresponding call to CoUninitialize. 633 // be balanced by a corresponding call to CoUninitialize.
548 CoUninitialize(); 634 CoUninitialize();
549 #endif 635 #endif
550 } 636 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698