Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // This file defines a WatchDog thread that monitors the responsiveness of other | 5 // This file defines a WatchDog thread that monitors the responsiveness of other |
| 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines | 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines |
| 7 // ThreadWatcher class which performs health check on threads that would like to | 7 // ThreadWatcher class which performs health check on threads that would like to |
| 8 // be watched. This file also defines ThreadWatcherList class that has list of | 8 // be watched. This file also defines ThreadWatcherList class that has list of |
| 9 // all active ThreadWatcher objects. | 9 // all active ThreadWatcher objects. |
| 10 // | 10 // |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 25 // | 25 // |
| 26 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); | 26 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); |
| 27 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); | 27 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); |
| 28 // ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time, | 28 // ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time, |
| 29 // unresponsive_time); | 29 // unresponsive_time); |
| 30 | 30 |
| 31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ | 31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ |
| 32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ | 32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ |
| 33 | 33 |
| 34 #include <map> | 34 #include <map> |
| 35 #include <set> | |
| 35 #include <string> | 36 #include <string> |
| 36 #include <vector> | 37 #include <vector> |
| 37 | 38 |
| 38 #include "base/basictypes.h" | 39 #include "base/basictypes.h" |
| 40 #include "base/command_line.h" | |
| 39 #include "base/gtest_prod_util.h" | 41 #include "base/gtest_prod_util.h" |
| 40 #include "base/memory/ref_counted.h" | 42 #include "base/memory/ref_counted.h" |
| 41 #include "base/memory/scoped_ptr.h" | 43 #include "base/memory/scoped_ptr.h" |
| 42 #include "base/message_loop.h" | 44 #include "base/message_loop.h" |
| 43 #include "base/metrics/histogram.h" | 45 #include "base/metrics/histogram.h" |
| 44 #include "base/synchronization/lock.h" | 46 #include "base/synchronization/lock.h" |
| 45 #include "base/task.h" | 47 #include "base/task.h" |
| 46 #include "base/threading/thread.h" | 48 #include "base/threading/thread.h" |
| 47 #include "base/time.h" | 49 #include "base/time.h" |
| 48 #include "content/browser/browser_thread.h" | 50 #include "content/browser/browser_thread.h" |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 151 Task* callback_task); | 153 Task* callback_task); |
| 152 | 154 |
| 153 // This method resets unresponsive_count_ to zero because watched thread is | 155 // This method resets unresponsive_count_ to zero because watched thread is |
| 154 // responding to the ping message with a pong message. | 156 // responding to the ping message with a pong message. |
| 155 void ResetHangCounters(); | 157 void ResetHangCounters(); |
| 156 | 158 |
| 157 // This method records watched thread is not responding to the ping message. | 159 // This method records watched thread is not responding to the ping message. |
| 158 // It increments unresponsive_count_ by 1. | 160 // It increments unresponsive_count_ by 1. |
| 159 void GotNoResponse(); | 161 void GotNoResponse(); |
| 160 | 162 |
| 163 // Helper method to get |unresponsive_count_|. | |
| 164 uint32 unresponsive_count() const { return unresponsive_count_; } | |
| 165 | |
| 161 // This is the number of ping messages to be sent when the user is idle. | 166 // This is the number of ping messages to be sent when the user is idle. |
| 162 // ping_count_ will be initialized to kPingCount whenever user becomes active. | 167 // ping_count_ will be initialized to kPingCount whenever user becomes active. |
| 163 static const int kPingCount; | 168 static const int kPingCount; |
| 164 | 169 |
| 165 // This value is used to determine if the watched thread is responsive or not. | |
| 166 // If unresponsive_count_ is less than kUnresponsiveCount then watched thread | |
| 167 // is considered as responsive (in responsive_count_histogram_) otherwise it | |
| 168 // is considered as unresponsive (in unresponsive_count_histogram_). | |
| 169 static const int kUnresponsiveCount; | |
| 170 | |
| 171 // The thread_id of the thread being watched. Only one instance can exist for | 170 // The thread_id of the thread being watched. Only one instance can exist for |
| 172 // the given thread_id of the thread being watched. | 171 // the given thread_id of the thread being watched. |
| 173 const BrowserThread::ID thread_id_; | 172 const BrowserThread::ID thread_id_; |
| 174 | 173 |
| 175 // The name of the thread being watched. | 174 // The name of the thread being watched. |
| 176 const std::string thread_name_; | 175 const std::string thread_name_; |
| 177 | 176 |
| 178 // It is the sleep time between between the receipt of a pong message back, | 177 // It is the sleep time between between the receipt of a pong message back, |
| 179 // and the sending of another ping message. | 178 // and the sending of another ping message. |
| 180 const base::TimeDelta sleep_time_; | 179 const base::TimeDelta sleep_time_; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 216 | 215 |
| 217 // Histogram that keeps track of how many threads are not responding when we | 216 // Histogram that keeps track of how many threads are not responding when we |
| 218 // got no response (GotNoResponse) from the watched thread. Count includes the | 217 // got no response (GotNoResponse) from the watched thread. Count includes the |
| 219 // thread that got no response. | 218 // thread that got no response. |
| 220 base::Histogram* unresponsive_count_histogram_; | 219 base::Histogram* unresponsive_count_histogram_; |
| 221 | 220 |
| 222 // This counter tracks the unresponsiveness of watched thread. If this value | 221 // This counter tracks the unresponsiveness of watched thread. If this value |
| 223 // is zero then watched thread has responded with a pong message. This is | 222 // is zero then watched thread has responded with a pong message. This is |
| 224 // incremented by 1 when we got no response (GotNoResponse) from the watched | 223 // incremented by 1 when we got no response (GotNoResponse) from the watched |
| 225 // thread. | 224 // thread. |
| 226 int unresponsive_count_; | 225 uint32 unresponsive_count_; |
| 227 | 226 |
| 228 // This is set to true when we would have crashed the browser because the | 227 // This is set to true when we would have crashed the browser because the |
| 229 // watched thread hasn't responded atleast 6 times. It is reset to false when | 228 // watched thread hasn't responded atleast 6 times. It is reset to false when |
| 230 // watched thread responds with a pong message. | 229 // watched thread responds with a pong message. |
| 231 bool hung_processing_complete_; | 230 bool hung_processing_complete_; |
| 232 | 231 |
| 233 // We use this factory to create callback tasks for ThreadWatcher object. We | 232 // We use this factory to create callback tasks for ThreadWatcher object. We |
| 234 // use this during ping-pong messaging between WatchDog thread and watched | 233 // use this during ping-pong messaging between WatchDog thread and watched |
| 235 // thread. | 234 // thread. |
| 236 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_; | 235 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_; |
| 237 | 236 |
| 238 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); | 237 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); |
| 239 }; | 238 }; |
| 240 | 239 |
| 241 // Class with a list of all active thread watchers. A thread watcher is active | 240 // Class with a list of all active thread watchers. A thread watcher is active |
| 242 // if it has been registered, which includes determing the histogram name. This | 241 // if it has been registered, which includes determing the histogram name. This |
| 243 // class provides utility functions to start and stop watching all browser | 242 // class provides utility functions to start and stop watching all browser |
| 244 // threads. Only one instance of this class exists. | 243 // threads. Only one instance of this class exists. |
| 245 class ThreadWatcherList : public NotificationObserver { | 244 class ThreadWatcherList : public NotificationObserver { |
| 246 public: | 245 public: |
| 247 // A map from BrowserThread to the actual instances. | 246 // A map from BrowserThread to the actual instances. |
| 248 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList; | 247 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList; |
| 249 | 248 |
| 250 // This singleton holds the global list of registered ThreadWatchers. | 249 // This singleton holds the global list of registered ThreadWatchers. |
| 251 ThreadWatcherList(); | 250 explicit ThreadWatcherList(const CommandLine& command_line); |
| 252 // Destructor deletes all registered ThreadWatcher instances. | 251 // Destructor deletes all registered ThreadWatcher instances. |
| 253 virtual ~ThreadWatcherList(); | 252 virtual ~ThreadWatcherList(); |
| 254 | 253 |
| 255 // Register() stores a pointer to the given ThreadWatcher in a global map. | 254 // Register() stores a pointer to the given ThreadWatcher in a global map. |
| 256 static void Register(ThreadWatcher* watcher); | 255 static void Register(ThreadWatcher* watcher); |
| 257 | 256 |
| 258 // This method returns true if the ThreadWatcher object is registerd. | 257 // This method returns true if the ThreadWatcher object is registerd. |
| 259 static bool IsRegistered(const BrowserThread::ID thread_id); | 258 static bool IsRegistered(const BrowserThread::ID thread_id); |
| 260 | 259 |
| 261 // This method posts a task on WatchDogThread to start watching all browser | 260 // This method posts a task on WatchDogThread to start watching all browser |
| 262 // threads. | 261 // threads. |
| 263 // This method is accessible on UI thread. | 262 // This method is accessible on UI thread. |
| 264 static void StartWatchingAll(); | 263 static void StartWatchingAll(); |
| 265 | 264 |
| 266 // This method posts a task on WatchDogThread to RevokeAll tasks and to | 265 // This method posts a task on WatchDogThread to RevokeAll tasks and to |
| 267 // deactive thread watching of other threads and tell NotificationService to | 266 // deactive thread watching of other threads and tell NotificationService to |
| 268 // stop calling Observe. | 267 // stop calling Observe. |
| 269 // This method is accessible on UI thread. | 268 // This method is accessible on UI thread. |
| 270 static void StopWatchingAll(); | 269 static void StopWatchingAll(); |
| 271 | 270 |
| 272 // RemoveAll NotificationTypes that are being observed. | 271 // RemoveAll NotificationTypes that are being observed. |
| 273 // This method is accessible on UI thread. | 272 // This method is accessible on UI thread. |
| 274 static void RemoveNotifications(); | 273 static void RemoveNotifications(); |
| 275 | 274 |
| 275 // Returns true if watched thread's |unresponsive_count_| is less than | |
| 276 // |crash_on_unresponsive_count_|. A watched thread is considered as | |
| 277 // unresponsive if it has not responded with a pong message for | |
| 278 // |crash_on_unresponsive_count_| number of ping messages. | |
| 279 static bool IsResponsive(ThreadWatcher* watcher); | |
| 280 | |
| 281 // Returns true if the watched thread is not responsive and is listed | |
| 282 // as one of the threads in "--crash-on-hang-threads" command line switch. | |
| 283 static bool CrashOnHang(ThreadWatcher* watcher); | |
| 284 | |
| 276 // This method returns number of watched threads that have responded and | 285 // This method returns number of watched threads that have responded and |
| 277 // threads that have not responded with a pong message. | 286 // threads that have not responded with a pong message for |
| 287 // |crash_on_unresponsive_count_| number of ping messages. | |
| 278 static void GetStatusOfThreads(int* no_of_responding_threads, | 288 static void GetStatusOfThreads(int* no_of_responding_threads, |
| 279 int* no_of_unresponding_threads); | 289 int* no_of_unresponding_threads); |
| 280 | 290 |
| 281 private: | 291 private: |
| 282 // Allow tests to access our innards for testing purposes. | 292 // Allow tests to access our innards for testing purposes. |
| 283 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); | 293 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); |
| 284 | 294 |
| 285 // Delete all thread watcher objects and remove them from global map. | 295 // Delete all thread watcher objects and remove them from global map. |
| 286 // This method is accessible on WatchDogThread. | 296 // This method is accessible on WatchDogThread. |
| 287 void DeleteAll(); | 297 void DeleteAll(); |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 309 | 319 |
| 310 static ThreadWatcherList* global_; // The singleton of this class. | 320 static ThreadWatcherList* global_; // The singleton of this class. |
| 311 | 321 |
| 312 // This is the wait time between ping messages. | 322 // This is the wait time between ping messages. |
| 313 static const int kSleepSeconds; | 323 static const int kSleepSeconds; |
| 314 | 324 |
| 315 // This is the wait time after ping message is sent, to check if we have | 325 // This is the wait time after ping message is sent, to check if we have |
| 316 // received pong message or not. | 326 // received pong message or not. |
| 317 static const int kUnresponsiveSeconds; | 327 static const int kUnresponsiveSeconds; |
| 318 | 328 |
| 329 // This is used to initialize |crash_on_unresponsive_count_|. | |
| 330 // |crash_on_unresponsive_count_| is used to determine if the watched thread | |
| 331 // is responsive or not. | |
|
jar (doing other things)
2011/06/10 00:38:33
nit: Don't bother explaining the variable... since
ramant (doing other things)
2011/06/13 03:26:02
Done.
| |
| 332 static const int kUnresponsiveCount; | |
| 333 | |
| 319 // Lock for access to registered_. | 334 // Lock for access to registered_. |
| 320 base::Lock lock_; | 335 base::Lock lock_; |
| 321 | 336 |
| 322 // Map of all registered watched threads, from thread_id to ThreadWatcher. | 337 // Map of all registered watched threads, from thread_id to ThreadWatcher. |
| 323 RegistrationList registered_; | 338 RegistrationList registered_; |
| 324 | 339 |
| 325 // The registrar that holds NotificationTypes to be observed. | 340 // The registrar that holds NotificationTypes to be observed. |
| 326 NotificationRegistrar registrar_; | 341 NotificationRegistrar registrar_; |
| 327 | 342 |
| 328 // This is the last time when woke all thread watchers up. | 343 // This is the last time when woke all thread watchers up. |
| 329 base::TimeTicks last_wakeup_time_; | 344 base::TimeTicks last_wakeup_time_; |
| 330 | 345 |
| 346 // This is used to determine if the watched thread is responsive or not. If | |
| 347 // watched thread's |unresponsive_count_| is greater than or equal to | |
| 348 // |crash_on_unresponsive_count_| then we could crash the browser if the | |
| 349 // watched thread is listed in the "--crash-on-hang-threads" command line | |
| 350 // switch. It is initialized with |kUnresponsiveCount|, but can be overwritten | |
| 351 // by the command line switch "--crash-on-hang-seconds". | |
| 352 uint32 crash_on_unresponsive_count_; | |
| 353 | |
| 354 // This is the set of watched thread's names that are to be crashed if they | |
| 355 // have not responded with a pong message for |crash_on_unresponsive_count_| | |
| 356 // number of ping messages. | |
| 357 std::set<std::string> crash_on_hang_thread_names_; | |
| 358 | |
| 331 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); | 359 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); |
| 332 }; | 360 }; |
| 333 | 361 |
| 334 // Class for WatchDogThread and in its Init method, we start watching UI, IO, | 362 // Class for WatchDogThread and in its Init method, we start watching UI, IO, |
| 335 // DB, FILE, CACHED threads. | 363 // DB, FILE, CACHED threads. |
| 336 class WatchDogThread : public base::Thread { | 364 class WatchDogThread : public base::Thread { |
| 337 public: | 365 public: |
| 338 // Constructor. | 366 // Constructor. |
| 339 WatchDogThread(); | 367 WatchDogThread(); |
| 340 | 368 |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 373 | 401 |
| 374 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); | 402 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); |
| 375 }; | 403 }; |
| 376 | 404 |
| 377 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling | 405 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling |
| 378 // refcounting of ThreadWatcher and ThreadWatcherList classes. | 406 // refcounting of ThreadWatcher and ThreadWatcherList classes. |
| 379 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher); | 407 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher); |
| 380 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList); | 408 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList); |
| 381 | 409 |
| 382 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ | 410 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ |
| OLD | NEW |