OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/string_tokenizer.h" | |
5 #include "base/threading/thread_restrictions.h" | 6 #include "base/threading/thread_restrictions.h" |
6 #include "build/build_config.h" | 7 #include "build/build_config.h" |
7 #include "chrome/browser/metrics/metrics_service.h" | 8 #include "chrome/browser/metrics/metrics_service.h" |
8 #include "chrome/browser/metrics/thread_watcher.h" | 9 #include "chrome/browser/metrics/thread_watcher.h" |
10 #include "chrome/common/chrome_switches.h" | |
9 #include "content/common/notification_service.h" | 11 #include "content/common/notification_service.h" |
10 | 12 |
11 #if defined(OS_WIN) | 13 #if defined(OS_WIN) |
12 #include <Objbase.h> | 14 #include <Objbase.h> |
13 #endif | 15 #endif |
14 | 16 |
15 // static | 17 // static |
16 const int ThreadWatcher::kPingCount = 6; | 18 const int ThreadWatcher::kPingCount = 6; |
17 | 19 |
18 // static | |
19 const int ThreadWatcher::kUnresponsiveCount = 6; | |
20 | |
21 // ThreadWatcher methods and members. | 20 // ThreadWatcher methods and members. |
22 ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, | 21 ThreadWatcher::ThreadWatcher(const BrowserThread::ID& thread_id, |
23 const std::string& thread_name, | 22 const std::string& thread_name, |
24 const base::TimeDelta& sleep_time, | 23 const base::TimeDelta& sleep_time, |
25 const base::TimeDelta& unresponsive_time) | 24 const base::TimeDelta& unresponsive_time) |
26 : thread_id_(thread_id), | 25 : thread_id_(thread_id), |
27 thread_name_(thread_name), | 26 thread_name_(thread_name), |
28 sleep_time_(sleep_time), | 27 sleep_time_(sleep_time), |
29 unresponsive_time_(unresponsive_time), | 28 unresponsive_time_(unresponsive_time), |
30 ping_time_(base::TimeTicks::Now()), | 29 ping_time_(base::TimeTicks::Now()), |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
237 | 236 |
238 void ThreadWatcher::ResetHangCounters() { | 237 void ThreadWatcher::ResetHangCounters() { |
239 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 238 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
240 unresponsive_count_ = 0; | 239 unresponsive_count_ = 0; |
241 hung_processing_complete_ = false; | 240 hung_processing_complete_ = false; |
242 } | 241 } |
243 | 242 |
244 void ThreadWatcher::GotNoResponse() { | 243 void ThreadWatcher::GotNoResponse() { |
245 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 244 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
246 | 245 |
247 // Record how other threads are responding when we don't get a response for | 246 ++unresponsive_count_; |
248 // ping message atleast kUnresponsiveCount times. | 247 |
249 if (++unresponsive_count_ < kUnresponsiveCount) | 248 // Check if the watched thread's unresponsiveness has gone over the limit. |
249 if (ThreadWatcherList::IsResponsive(this)) | |
jar (doing other things)
2011/06/10 00:38:33
This interface surprises me. We are asking a ques
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
250 return; | 250 return; |
251 | 251 |
252 // Record total unresponsive_time since last pong message. | 252 // Record total unresponsive_time since last pong message. |
253 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; | 253 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_; |
254 unresponsive_time_histogram_->AddTime(unresponse_time); | 254 unresponsive_time_histogram_->AddTime(unresponse_time); |
255 | 255 |
256 // We have already collected stats for the non-responding watched thread. | 256 // We have already collected stats for the non-responding watched thread. |
257 if (hung_processing_complete_) | 257 if (hung_processing_complete_) |
258 return; | 258 return; |
259 | 259 |
260 int no_of_responding_threads = 0; | 260 int no_of_responding_threads = 0; |
261 int no_of_unresponding_threads = 0; | 261 int no_of_unresponding_threads = 0; |
262 ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, | 262 ThreadWatcherList::GetStatusOfThreads(&no_of_responding_threads, |
263 &no_of_unresponding_threads); | 263 &no_of_unresponding_threads); |
264 | 264 |
265 // Record how many watched threads are responding. | 265 // Record how many watched threads are responding. |
266 responsive_count_histogram_->Add(no_of_responding_threads); | 266 responsive_count_histogram_->Add(no_of_responding_threads); |
267 | 267 |
268 // Record how many watched threads are not responding. | 268 // Record how many watched threads are not responding. |
269 unresponsive_count_histogram_->Add(no_of_unresponding_threads); | 269 unresponsive_count_histogram_->Add(no_of_unresponding_threads); |
270 | 270 |
271 // Crash the browser if IO thread hasn't responded atleast kUnresponsiveCount | 271 // Crash the browser if watched thread is in "--crash-on-hang-threads" command |
jar (doing other things)
2011/06/10 00:38:33
This sentence seems to focus on the command line s
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
272 // times and if the number of other threads is equal to 1. We picked 1 to | 272 // line switch. We crash if the number of threads responding is equal to 1. We |
273 // reduce the number of crashes and to get some sample data. | 273 // picked 1 to reduce the number of crashes and to get some sample data. |
274 if (thread_id_ == BrowserThread::IO && no_of_responding_threads == 1) { | 274 if (no_of_responding_threads == 1 && ThreadWatcherList::CrashOnHang(this)) { |
jar (doing other things)
2011/06/10 00:38:33
I'm not sure which is better.... but I suspect we
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
275 int* crash = NULL; | 275 int* crash = NULL; |
276 CHECK(crash++); | 276 CHECK(crash+thread_id_); |
277 } | 277 } |
278 | 278 |
279 hung_processing_complete_ = true; | 279 hung_processing_complete_ = true; |
280 } | 280 } |
281 | 281 |
282 // ThreadWatcherList methods and members. | 282 // ThreadWatcherList methods and members. |
283 // | 283 // |
284 // static | 284 // static |
285 ThreadWatcherList* ThreadWatcherList::global_ = NULL; | 285 ThreadWatcherList* ThreadWatcherList::global_ = NULL; |
286 // static | 286 // static |
287 const int ThreadWatcherList::kSleepSeconds = 1; | 287 const int ThreadWatcherList::kSleepSeconds = 1; |
288 // static | 288 // static |
289 const int ThreadWatcherList::kUnresponsiveSeconds = 2; | 289 const int ThreadWatcherList::kUnresponsiveSeconds = 2; |
290 // static | |
291 const int ThreadWatcherList::kUnresponsiveCount = 6; | |
290 | 292 |
291 ThreadWatcherList::ThreadWatcherList() | 293 ThreadWatcherList::ThreadWatcherList(const CommandLine& command_line) |
292 : last_wakeup_time_(base::TimeTicks::Now()) { | 294 : last_wakeup_time_(base::TimeTicks::Now()) { |
293 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we | 295 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
294 // are on UI thread, but Unit tests are not running on UI thread. | 296 // are on UI thread, but Unit tests are not running on UI thread. |
295 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); | 297 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); |
296 CHECK(!global_); | 298 CHECK(!global_); |
297 global_ = this; | 299 global_ = this; |
298 // Register Notifications observer. | 300 // Register Notifications observer. |
299 MetricsService::SetUpNotifications(®istrar_, this); | 301 MetricsService::SetUpNotifications(®istrar_, this); |
302 | |
303 crash_on_unresponsive_count_ = kUnresponsiveCount; | |
304 std::string crash_on_hang_seconds = | |
305 command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); | |
306 if (!crash_on_hang_seconds.empty()) { | |
307 int crash_seconds = atoi(crash_on_hang_seconds.c_str()); | |
308 if (crash_seconds > 0) | |
309 crash_on_unresponsive_count_ = crash_seconds / kUnresponsiveSeconds; | |
jar (doing other things)
2011/06/10 00:38:33
I don't think you meant to scale it down. If you
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
310 } | |
311 | |
312 std::string crash_on_hang_threads = | |
313 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); | |
314 if (crash_on_hang_threads.empty()) { | |
315 // Crash the browser if UI or IO threads are not responsive. | |
316 crash_on_hang_threads = "UI,IO"; | |
jar (doing other things)
2011/06/10 00:38:33
You probably could use an early return here. Perh
ramant (doing other things)
2011/06/13 03:26:02
Wanted to add UI and IO threads (which we wanted t
| |
317 } | |
318 StringTokenizer t(crash_on_hang_threads, ","); | |
319 while (t.GetNext()) { | |
320 std::string thread_name = t.token(); | |
321 // We will ignore empty and duplicate thread_names. | |
jar (doing other things)
2011/06/10 00:38:33
You probably don't need to worry about dups (which
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
322 if (!thread_name.empty()) | |
323 crash_on_hang_thread_names_.insert(thread_name); | |
324 } | |
300 } | 325 } |
301 | 326 |
302 ThreadWatcherList::~ThreadWatcherList() { | 327 ThreadWatcherList::~ThreadWatcherList() { |
303 base::AutoLock auto_lock(lock_); | 328 base::AutoLock auto_lock(lock_); |
304 DCHECK(this == global_); | 329 DCHECK(this == global_); |
330 global_->crash_on_hang_thread_names_.clear(); | |
jar (doing other things)
2011/06/10 00:38:33
I don't think you need to waste time clear()ing.
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
305 global_ = NULL; | 331 global_ = NULL; |
306 } | 332 } |
307 | 333 |
308 // static | 334 // static |
309 void ThreadWatcherList::Register(ThreadWatcher* watcher) { | 335 void ThreadWatcherList::Register(ThreadWatcher* watcher) { |
310 if (!global_) | 336 if (!global_) |
311 return; | 337 return; |
312 base::AutoLock auto_lock(global_->lock_); | 338 base::AutoLock auto_lock(global_->lock_); |
313 DCHECK(!global_->PreLockedFind(watcher->thread_id())); | 339 DCHECK(!global_->PreLockedFind(watcher->thread_id())); |
314 global_->registered_[watcher->thread_id()] = watcher; | 340 global_->registered_[watcher->thread_id()] = watcher; |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
377 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we | 403 // Assert we are not running on WATCHDOG thread. Would be ideal to assert we |
378 // are on UI thread, but Unit tests are not running on UI thread. | 404 // are on UI thread, but Unit tests are not running on UI thread. |
379 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); | 405 DCHECK(!WatchDogThread::CurrentlyOnWatchDogThread()); |
380 if (!global_) | 406 if (!global_) |
381 return; | 407 return; |
382 base::AutoLock auto_lock(global_->lock_); | 408 base::AutoLock auto_lock(global_->lock_); |
383 global_->registrar_.RemoveAll(); | 409 global_->registrar_.RemoveAll(); |
384 } | 410 } |
385 | 411 |
386 // static | 412 // static |
413 bool ThreadWatcherList::IsResponsive(ThreadWatcher* watcher) { | |
jar (doing other things)
2011/06/10 00:38:33
I'd rather see just the unresponsive_count() passe
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
| |
414 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | |
415 if (!global_) | |
416 return true; | |
417 if (watcher->unresponsive_count() < global_->crash_on_unresponsive_count_) | |
418 return true; | |
419 return false; | |
jar (doing other things)
2011/06/10 00:38:33
Except for unusual circumstances (where we plan to
ramant (doing other things)
2011/06/13 03:26:02
Moved this method into ThreadWatcher.
Done.
| |
420 } | |
421 | |
422 // static | |
423 bool ThreadWatcherList::CrashOnHang(ThreadWatcher* watcher) { | |
jar (doing other things)
2011/06/10 00:38:33
I'd rather see this take a std::string thread_name
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
424 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | |
425 if (!global_) | |
426 return false; | |
427 if (IsResponsive(watcher)) | |
428 return false; | |
429 std::set<std::string>::iterator it = | |
430 global_->crash_on_hang_thread_names_.find(watcher->thread_name()); | |
431 if (it != global_->crash_on_hang_thread_names_.end()) | |
432 return true; | |
433 return false; | |
jar (doing other things)
2011/06/10 00:38:33
return it != global_->crash_on_hang_thread_names_.
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
434 } | |
435 | |
436 // static | |
387 void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads, | 437 void ThreadWatcherList::GetStatusOfThreads(int* no_of_responding_threads, |
388 int* no_of_unresponding_threads) { | 438 int* no_of_unresponding_threads) { |
389 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 439 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
390 *no_of_responding_threads = 0; | 440 *no_of_responding_threads = 0; |
391 *no_of_unresponding_threads = 0; | 441 *no_of_unresponding_threads = 0; |
392 if (!global_) | 442 if (!global_) |
393 return; | 443 return; |
394 | 444 |
395 base::AutoLock auto_lock(global_->lock_); | 445 base::AutoLock auto_lock(global_->lock_); |
396 for (RegistrationList::iterator it = global_->registered_.begin(); | 446 for (RegistrationList::iterator it = global_->registered_.begin(); |
397 global_->registered_.end() != it; | 447 global_->registered_.end() != it; |
398 ++it) { | 448 ++it) { |
399 if (it->second->unresponsive_count_ < ThreadWatcher::kUnresponsiveCount) | 449 if (it->second->unresponsive_count() < |
450 global_->crash_on_unresponsive_count_) | |
jar (doing other things)
2011/06/10 00:38:33
FWIW: IF you restructured this (pushing the thresh
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
400 ++(*no_of_responding_threads); | 451 ++(*no_of_responding_threads); |
401 else | 452 else |
402 ++(*no_of_unresponding_threads); | 453 ++(*no_of_unresponding_threads); |
403 } | 454 } |
404 } | 455 } |
405 | 456 |
406 void ThreadWatcherList::DeleteAll() { | 457 void ThreadWatcherList::DeleteAll() { |
407 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); | 458 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread()); |
408 base::AutoLock auto_lock(lock_); | 459 base::AutoLock auto_lock(lock_); |
409 while (!registered_.empty()) { | 460 while (!registered_.empty()) { |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
541 watchdog_thread_ = NULL; | 592 watchdog_thread_ = NULL; |
542 } | 593 } |
543 | 594 |
544 void WatchDogThread::CleanUpAfterMessageLoopDestruction() { | 595 void WatchDogThread::CleanUpAfterMessageLoopDestruction() { |
545 #if defined(OS_WIN) | 596 #if defined(OS_WIN) |
546 // Closes the COM library on the current thread. CoInitialize must | 597 // Closes the COM library on the current thread. CoInitialize must |
547 // be balanced by a corresponding call to CoUninitialize. | 598 // be balanced by a corresponding call to CoUninitialize. |
548 CoUninitialize(); | 599 CoUninitialize(); |
549 #endif | 600 #endif |
550 } | 601 } |
OLD | NEW |