Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // This file defines a WatchDog thread that monitors the responsiveness of other | 5 // This file defines a WatchDog thread that monitors the responsiveness of other |
| 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines | 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines |
| 7 // ThreadWatcher class which performs health check on threads that would like to | 7 // ThreadWatcher class which performs health check on threads that would like to |
| 8 // be watched. This file also defines ThreadWatcherList class that has list of | 8 // be watched. This file also defines ThreadWatcherList class that has list of |
| 9 // all active ThreadWatcher objects. | 9 // all active ThreadWatcher objects. |
| 10 // | 10 // |
| 11 // ThreadWatcher class sends ping message to the watched thread and the watched | 11 // ThreadWatcher class sends ping message to the watched thread and the watched |
| 12 // thread responds back with a pong message. It uploads response time | 12 // thread responds back with a pong message. It uploads response time |
| 13 // (difference between ping and pong times) as a histogram. | 13 // (difference between ping and pong times) as a histogram. |
| 14 // | 14 // |
| 15 // TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is | 15 // TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is |
| 16 // detected, we should probably just crash, and allow the crash system to gather | 16 // detected, we should probably just crash, and allow the crash system to gather |
| 17 // then stack trace. | 17 // then stack trace. |
| 18 // | 18 // |
| 19 // Example Usage: | 19 // Example Usage: |
| 20 // | 20 // |
| 21 // The following is an example for watching responsiveness of IO thread. | 21 // The following is an example for watching responsiveness of IO thread. |
| 22 // sleep_time specifies how often ping messages have to be sent to IO thread. | 22 // |sleep_time| specifies how often ping messages have to be sent to IO |
| 23 // unresponsive_time is the wait time after ping message is sent, to check if | 23 // thread. |unresponsive_time| is the wait time after ping message is sent, to |
| 24 // we have received pong message or not. | 24 // check if we have received pong message or not. |unresponsive_threshold| |
| 25 // specifies the number of unanswered ping messages after which IO thread is | |
| 26 // considered as not responsive. |crash_on_hang| specifies if we want to crash | |
| 27 // the browser when the watched thread has become sufficiently unresponsive, | |
| 28 // while other threads are sufficiently responsive. |live_threads_threshold| | |
| 29 // specifies the number of browser threads that are to be responsive when we | |
| 30 // want to crash the browser because of hung watched thread. | |
|
jar (doing other things)
2011/06/18 15:04:16
nit: You used "watched thread" on line 30 and 27,
ramant (doing other things)
2011/06/19 21:18:16
Done.
| |
| 25 // | 31 // |
| 26 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); | 32 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); |
| 27 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); | 33 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); |
| 28 // ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time, | 34 // uint32 unresponsive_threshold = ThreadWatcherList::kUnresponsiveCount; |
| 29 // unresponsive_time); | 35 // bool crash_on_hang = false; |
| 36 // uint32 live_threads_threshold = ThreadWatcherList::kLiveThreadsThreshold; | |
| 37 // ThreadWatcher::StartWatching( | |
| 38 // BrowserThread::IO, "IO", sleep_time, unresponsive_time, | |
| 39 // unresponsive_threshold, crash_on_hang, live_threads_threshold); | |
| 30 | 40 |
| 31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ | 41 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ |
| 32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ | 42 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ |
| 33 | 43 |
| 34 #include <map> | 44 #include <map> |
| 45 #include <set> | |
| 35 #include <string> | 46 #include <string> |
| 36 #include <vector> | 47 #include <vector> |
| 37 | 48 |
| 38 #include "base/basictypes.h" | 49 #include "base/basictypes.h" |
| 50 #include "base/command_line.h" | |
| 39 #include "base/gtest_prod_util.h" | 51 #include "base/gtest_prod_util.h" |
| 40 #include "base/memory/ref_counted.h" | 52 #include "base/memory/ref_counted.h" |
| 41 #include "base/memory/scoped_ptr.h" | 53 #include "base/memory/scoped_ptr.h" |
| 42 #include "base/message_loop.h" | 54 #include "base/message_loop.h" |
| 43 #include "base/metrics/histogram.h" | 55 #include "base/metrics/histogram.h" |
| 44 #include "base/synchronization/lock.h" | 56 #include "base/synchronization/lock.h" |
| 45 #include "base/task.h" | 57 #include "base/task.h" |
| 46 #include "base/threading/thread.h" | 58 #include "base/threading/thread.h" |
| 47 #include "base/time.h" | 59 #include "base/time.h" |
| 48 #include "content/browser/browser_thread.h" | 60 #include "content/browser/browser_thread.h" |
| 49 #include "content/common/notification_observer.h" | 61 #include "content/common/notification_observer.h" |
| 50 #include "content/common/notification_registrar.h" | 62 #include "content/common/notification_registrar.h" |
| 51 | 63 |
| 52 class CustomThreadWatcher; | 64 class CustomThreadWatcher; |
| 53 class ThreadWatcherList; | 65 class ThreadWatcherList; |
| 66 class ThreadWatcherObserver; | |
| 54 | 67 |
| 55 // This class performs health check on threads that would like to be watched. | 68 // This class performs health check on threads that would like to be watched. |
| 56 class ThreadWatcher { | 69 class ThreadWatcher { |
| 57 public: | 70 public: |
| 58 // This method starts performing health check on the given thread_id. It will | 71 // This method starts performing health check on the given |thread_id|. It |
| 59 // create ThreadWatcher object for the given thread_id, thread_name, | 72 // will create ThreadWatcher object for the given |thread_id|, |thread_name|. |
| 60 // sleep_time and unresponsive_time. sleep_time_ is the wait time between ping | 73 // |sleep_time| is the wait time between ping messages. |unresponsive_time| is |
| 61 // messages. unresponsive_time_ is the wait time after ping message is sent, | 74 // the wait time after ping message is sent, to check if we have received pong |
| 62 // to check if we have received pong message or not. It will register that | 75 // message or not. |unresponsive_threshold| is used to determine if the thread |
| 63 // ThreadWatcher object and activate the thread watching of the given | 76 // is responsive or not. The watched thread is considered unresponsive if it |
| 64 // thread_id. | 77 // hasn't responded with a pong message for |unresponsive_threshold| number of |
| 78 // ping messages. |crash_on_hang| specifies if browser should be crashed when | |
| 79 // the watched thread is unresponsive. |live_threads_threshold| specifies the | |
| 80 // number of browser threads that are to be responsive when we want to crash | |
| 81 // the browser and watched thread has become sufficiently unresponsive. It | |
| 82 // will register that ThreadWatcher object and activate the thread watching of | |
| 83 // the given thread_id. | |
| 65 static void StartWatching(const BrowserThread::ID& thread_id, | 84 static void StartWatching(const BrowserThread::ID& thread_id, |
| 66 const std::string& thread_name, | 85 const std::string& thread_name, |
| 67 const base::TimeDelta& sleep_time, | 86 const base::TimeDelta& sleep_time, |
| 68 const base::TimeDelta& unresponsive_time); | 87 const base::TimeDelta& unresponsive_time, |
| 88 uint32 unresponsive_threshold, | |
| 89 bool crash_on_hang, | |
| 90 uint32 live_threads_threshold); | |
| 69 | 91 |
| 70 // Return the thread_id of the thread being watched. | 92 // Return the |thread_id_| of the thread being watched. |
| 71 BrowserThread::ID thread_id() const { return thread_id_; } | 93 BrowserThread::ID thread_id() const { return thread_id_; } |
| 72 | 94 |
| 73 // Return the name of the thread being watched. | 95 // Return the name of the thread being watched. |
| 74 std::string thread_name() const { return thread_name_; } | 96 std::string thread_name() const { return thread_name_; } |
| 75 | 97 |
| 76 // Return the sleep time between ping messages to be sent to the thread. | 98 // Return the sleep time between ping messages to be sent to the thread. |
| 77 base::TimeDelta sleep_time() const { return sleep_time_; } | 99 base::TimeDelta sleep_time() const { return sleep_time_; } |
| 78 | 100 |
| 79 // Return the the wait time to check the responsiveness of the thread. | 101 // Return the the wait time to check the responsiveness of the thread. |
| 80 base::TimeDelta unresponsive_time() const { return unresponsive_time_; } | 102 base::TimeDelta unresponsive_time() const { return unresponsive_time_; } |
| 81 | 103 |
| 82 // Returns true if we are montioring the thread. | 104 // Returns true if we are montioring the thread. |
| 83 bool active() const { return active_; } | 105 bool active() const { return active_; } |
| 84 | 106 |
| 85 // Returns ping_time_ (used by unit tests). | 107 // Returns |ping_time_| (used by unit tests). |
| 86 base::TimeTicks ping_time() const { return ping_time_; } | 108 base::TimeTicks ping_time() const { return ping_time_; } |
| 87 | 109 |
| 88 // Returns ping_sequence_number_ (used by unit tests). | 110 // Returns |ping_sequence_number_| (used by unit tests). |
| 89 uint64 ping_sequence_number() const { return ping_sequence_number_; } | 111 uint64 ping_sequence_number() const { return ping_sequence_number_; } |
| 90 | 112 |
| 91 protected: | 113 protected: |
| 92 // Construct a ThreadWatcher for the given thread_id. sleep_time_ is the | 114 // Construct a ThreadWatcher for the given |thread_id|. |sleep_time| is the |
| 93 // wait time between ping messages. unresponsive_time_ is the wait time after | 115 // wait time between ping messages. |unresponsive_time| is the wait time after |
| 94 // ping message is sent, to check if we have received pong message or not. | 116 // ping message is sent, to check if we have received pong message or not. |
| 95 ThreadWatcher(const BrowserThread::ID& thread_id, | 117 ThreadWatcher(const BrowserThread::ID& thread_id, |
| 96 const std::string& thread_name, | 118 const std::string& thread_name, |
| 97 const base::TimeDelta& sleep_time, | 119 const base::TimeDelta& sleep_time, |
| 98 const base::TimeDelta& unresponsive_time); | 120 const base::TimeDelta& unresponsive_time, |
| 121 uint32 unresponsive_threshold, | |
| 122 bool crash_on_hang, | |
| 123 uint32 live_threads_threshold); | |
| 99 virtual ~ThreadWatcher(); | 124 virtual ~ThreadWatcher(); |
| 100 | 125 |
| 101 // This method activates the thread watching which starts ping/pong messaging. | 126 // This method activates the thread watching which starts ping/pong messaging. |
| 102 virtual void ActivateThreadWatching(); | 127 virtual void ActivateThreadWatching(); |
| 103 | 128 |
| 104 // This method de-activates the thread watching and revokes all tasks. | 129 // This method de-activates the thread watching and revokes all tasks. |
| 105 virtual void DeActivateThreadWatching(); | 130 virtual void DeActivateThreadWatching(); |
| 106 | 131 |
| 107 // This will ensure that the watching is actively taking place, and awaken | 132 // This will ensure that the watching is actively taking place, and awaken |
| 108 // (i.e., post a PostPingMessage) if the watcher has stopped pinging due to | 133 // (i.e., post a PostPingMessage()) if the watcher has stopped pinging due to |
| 109 // lack of user activity. It will also reset ping_count_ to kPingCount. | 134 // lack of user activity. It will also reset |ping_count_| to |kPingCount|. |
| 110 virtual void WakeUp(); | 135 virtual void WakeUp(); |
| 111 | 136 |
| 112 // This method records when ping message was sent and it will Post a task | 137 // This method records when ping message was sent and it will Post a task |
| 113 // (OnPingMessage) to the watched thread that does nothing but respond with | 138 // (OnPingMessage()) to the watched thread that does nothing but respond with |
| 114 // OnPongMessage. It also posts a task (OnCheckResponsiveness) to check | 139 // OnPongMessage(). It also posts a task (OnCheckResponsiveness()) to check |
| 115 // responsiveness of monitored thread that would be called after waiting | 140 // responsiveness of monitored thread that would be called after waiting |
| 116 // unresponsive_time_. | 141 // |unresponsive_time_|. |
| 117 // This method is accessible on WatchDogThread. | 142 // This method is accessible on WatchDogThread. |
| 118 virtual void PostPingMessage(); | 143 virtual void PostPingMessage(); |
| 119 | 144 |
| 120 // This method handles a Pong Message from watched thread. It will track the | 145 // This method handles a Pong Message from watched thread. It will track the |
| 121 // response time (pong time minus ping time) via histograms. It posts a | 146 // response time (pong time minus ping time) via histograms. It posts a |
| 122 // PostPingMessage task that would be called after waiting sleep_time_. It | 147 // PostPingMessage() task that would be called after waiting |sleep_time_|. It |
| 123 // increments ping_sequence_number_ by 1. | 148 // increments |ping_sequence_number_| by 1. |
| 124 // This method is accessible on WatchDogThread. | 149 // This method is accessible on WatchDogThread. |
| 125 virtual void OnPongMessage(uint64 ping_sequence_number); | 150 virtual void OnPongMessage(uint64 ping_sequence_number); |
| 126 | 151 |
| 127 // This method will determine if the watched thread is responsive or not. If | 152 // This method will determine if the watched thread is responsive or not. If |
| 128 // the latest ping_sequence_number_ is not same as the ping_sequence_number | 153 // the latest |ping_sequence_number_| is not same as the |
| 129 // that is passed in, then we can assume that watched thread has responded | 154 // |ping_sequence_number| that is passed in, then we can assume that watched |
| 130 // with a pong message. | 155 // thread has responded with a pong message. |
| 131 // This method is accessible on WatchDogThread. | 156 // This method is accessible on WatchDogThread. |
| 132 virtual bool OnCheckResponsiveness(uint64 ping_sequence_number); | 157 virtual bool OnCheckResponsiveness(uint64 ping_sequence_number); |
| 133 | 158 |
| 134 private: | 159 private: |
| 135 friend class ThreadWatcherList; | 160 friend class ThreadWatcherList; |
| 136 friend class CustomThreadWatcher; | 161 friend class CustomThreadWatcher; |
| 137 | 162 |
| 138 // Allow tests to access our innards for testing purposes. | 163 // Allow tests to access our innards for testing purposes. |
| 139 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); | 164 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); |
| 140 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding); | 165 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding); |
| 141 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding); | 166 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding); |
| 142 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding); | 167 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding); |
| 143 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding); | 168 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding); |
| 144 | 169 |
| 145 // Post constructor initialization. | 170 // Post constructor initialization. |
| 146 void Initialize(); | 171 void Initialize(); |
| 147 | 172 |
| 148 // Watched thread does nothing except post callback_task to the WATCHDOG | 173 // Watched thread does nothing except post callback_task to the WATCHDOG |
| 149 // Thread. This method is called on watched thread. | 174 // Thread. This method is called on watched thread. |
| 150 static void OnPingMessage(const BrowserThread::ID& thread_id, | 175 static void OnPingMessage(const BrowserThread::ID& thread_id, |
| 151 Task* callback_task); | 176 Task* callback_task); |
| 152 | 177 |
| 153 // This method resets unresponsive_count_ to zero because watched thread is | 178 // This method resets |unresponsive_count_| to zero because watched thread is |
| 154 // responding to the ping message with a pong message. | 179 // responding to the ping message with a pong message. |
| 155 void ResetHangCounters(); | 180 void ResetHangCounters(); |
| 156 | 181 |
| 157 // This method records watched thread is not responding to the ping message. | 182 // This method records watched thread is not responding to the ping message. |
| 158 // It increments unresponsive_count_ by 1. | 183 // It increments |unresponsive_count_| by 1. |
| 159 void GotNoResponse(); | 184 void GotNoResponse(); |
| 160 | 185 |
| 186 // This method returns true if the watched thread has not responded with a | |
| 187 // pong message for |unresponsive_threshold_| number of ping messages. | |
| 188 bool CrashOnUnresponsiveness(); | |
| 189 | |
| 161 // This is the number of ping messages to be sent when the user is idle. | 190 // This is the number of ping messages to be sent when the user is idle. |
| 162 // ping_count_ will be initialized to kPingCount whenever user becomes active. | 191 // ping_count_ will be initialized to kPingCount whenever user becomes active. |
| 163 static const int kPingCount; | 192 static const int kPingCount; |
| 164 | 193 |
| 165 // This value is used to determine if the watched thread is responsive or not. | 194 // The |thread_id_| of the thread being watched. Only one instance can exist |
| 166 // If unresponsive_count_ is less than kUnresponsiveCount then watched thread | 195 // for the given |thread_id_| of the thread being watched. |
| 167 // is considered as responsive (in responsive_count_histogram_) otherwise it | |
| 168 // is considered as unresponsive (in unresponsive_count_histogram_). | |
| 169 static const int kUnresponsiveCount; | |
| 170 | |
| 171 // The thread_id of the thread being watched. Only one instance can exist for | |
| 172 // the given thread_id of the thread being watched. | |
| 173 const BrowserThread::ID thread_id_; | 196 const BrowserThread::ID thread_id_; |
| 174 | 197 |
| 175 // The name of the thread being watched. | 198 // The name of the thread being watched. |
| 176 const std::string thread_name_; | 199 const std::string thread_name_; |
| 177 | 200 |
| 178 // It is the sleep time between between the receipt of a pong message back, | 201 // It is the sleep time between the receipt of a pong message back, and the |
| 179 // and the sending of another ping message. | 202 // sending of another ping message. |
| 180 const base::TimeDelta sleep_time_; | 203 const base::TimeDelta sleep_time_; |
| 181 | 204 |
| 182 // It is the duration from sending a ping message, until we check status to be | 205 // It is the duration from sending a ping message, until we check status to be |
| 183 // sure a pong message has been returned. | 206 // sure a pong message has been returned. |
| 184 const base::TimeDelta unresponsive_time_; | 207 const base::TimeDelta unresponsive_time_; |
| 185 | 208 |
| 186 // This is the last time when ping message was sent. | 209 // This is the last time when ping message was sent. |
| 187 base::TimeTicks ping_time_; | 210 base::TimeTicks ping_time_; |
| 188 | 211 |
| 189 // This is the last time when we got pong message. | 212 // This is the last time when we got pong message. |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 200 // The counter tracks least number of ping messages that will be sent to | 223 // The counter tracks least number of ping messages that will be sent to |
| 201 // watched thread before the ping-pong mechanism will go into an extended | 224 // watched thread before the ping-pong mechanism will go into an extended |
| 202 // sleep. If this value is zero, then the mechanism is in an extended sleep, | 225 // sleep. If this value is zero, then the mechanism is in an extended sleep, |
| 203 // and awaiting some observed user action before continuing. | 226 // and awaiting some observed user action before continuing. |
| 204 int ping_count_; | 227 int ping_count_; |
| 205 | 228 |
| 206 // Histogram that keeps track of response times for the watched thread. | 229 // Histogram that keeps track of response times for the watched thread. |
| 207 base::Histogram* response_time_histogram_; | 230 base::Histogram* response_time_histogram_; |
| 208 | 231 |
| 209 // Histogram that keeps track of unresponsive time since the last pong message | 232 // Histogram that keeps track of unresponsive time since the last pong message |
| 210 // when we got no response (GotNoResponse) from the watched thread. | 233 // when we got no response (GotNoResponse()) from the watched thread. |
| 211 base::Histogram* unresponsive_time_histogram_; | 234 base::Histogram* unresponsive_time_histogram_; |
| 212 | 235 |
| 213 // Histogram that keeps track of how many threads are responding when we got | 236 // Histogram that keeps track of how many threads are responding when we got |
| 214 // no response (GotNoResponse) from the watched thread. | 237 // no response (GotNoResponse()) from the watched thread. |
| 215 base::Histogram* responsive_count_histogram_; | 238 base::Histogram* responsive_count_histogram_; |
| 216 | 239 |
| 217 // Histogram that keeps track of how many threads are not responding when we | 240 // Histogram that keeps track of how many threads are not responding when we |
| 218 // got no response (GotNoResponse) from the watched thread. Count includes the | 241 // got no response (GotNoResponse()) from the watched thread. Count includes |
| 219 // thread that got no response. | 242 // the thread that got no response. |
| 220 base::Histogram* unresponsive_count_histogram_; | 243 base::Histogram* unresponsive_count_histogram_; |
| 221 | 244 |
| 222 // This counter tracks the unresponsiveness of watched thread. If this value | 245 // This counter tracks the unresponsiveness of watched thread. If this value |
| 223 // is zero then watched thread has responded with a pong message. This is | 246 // is zero then watched thread has responded with a pong message. This is |
| 224 // incremented by 1 when we got no response (GotNoResponse) from the watched | 247 // incremented by 1 when we got no response (GotNoResponse()) from the watched |
| 225 // thread. | 248 // thread. |
| 226 int unresponsive_count_; | 249 uint32 unresponsive_count_; |
| 227 | 250 |
| 228 // This is set to true when we would have crashed the browser because the | 251 // This is set to true when we would have crashed the browser because the |
| 229 // watched thread hasn't responded atleast 6 times. It is reset to false when | 252 // watched thread hasn't responded atleast 6 times. It is reset to false when |
| 230 // watched thread responds with a pong message. | 253 // watched thread responds with a pong message. |
| 231 bool hung_processing_complete_; | 254 bool hung_processing_complete_; |
| 232 | 255 |
| 256 // This is used to determine if the watched thread is responsive or not. If | |
| 257 // watched thread's |unresponsive_count_| is greater than or equal to | |
| 258 // |unresponsive_threshold_| then we would consider it as unresponsive. | |
| 259 uint32 unresponsive_threshold_; | |
| 260 | |
| 261 // This is set to true if we want to crash the browser when the watched thread | |
| 262 // has become sufficiently unresponsive, while other threads are sufficiently | |
| 263 // responsive. | |
| 264 bool crash_on_hang_; | |
| 265 | |
| 266 // This specifies the number of browser threads that are to be responsive when | |
| 267 // we want to crash the browser because watched thread has become sufficiently | |
| 268 // unresponsive. | |
| 269 uint32 live_threads_threshold_; | |
| 270 | |
| 233 // We use this factory to create callback tasks for ThreadWatcher object. We | 271 // We use this factory to create callback tasks for ThreadWatcher object. We |
| 234 // use this during ping-pong messaging between WatchDog thread and watched | 272 // use this during ping-pong messaging between WatchDog thread and watched |
| 235 // thread. | 273 // thread. |
| 236 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_; | 274 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_; |
| 237 | 275 |
| 238 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); | 276 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); |
| 239 }; | 277 }; |
| 240 | 278 |
| 241 // Class with a list of all active thread watchers. A thread watcher is active | 279 // Class with a list of all active thread watchers. A thread watcher is active |
| 242 // if it has been registered, which includes determing the histogram name. This | 280 // if it has been registered, which includes determing the histogram name. This |
| 243 // class provides utility functions to start and stop watching all browser | 281 // class provides utility functions to start and stop watching all browser |
| 244 // threads. Only one instance of this class exists. | 282 // threads. Only one instance of this class exists. |
| 245 class ThreadWatcherList : public NotificationObserver { | 283 class ThreadWatcherList { |
| 246 public: | 284 public: |
| 247 // A map from BrowserThread to the actual instances. | 285 // A map from BrowserThread to the actual instances. |
| 248 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList; | 286 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList; |
| 249 | 287 |
| 250 // This singleton holds the global list of registered ThreadWatchers. | |
| 251 ThreadWatcherList(); | |
| 252 // Destructor deletes all registered ThreadWatcher instances. | |
| 253 virtual ~ThreadWatcherList(); | |
| 254 | |
| 255 // Register() stores a pointer to the given ThreadWatcher in a global map. | |
| 256 static void Register(ThreadWatcher* watcher); | |
| 257 | |
| 258 // This method returns true if the ThreadWatcher object is registerd. | |
| 259 static bool IsRegistered(const BrowserThread::ID thread_id); | |
| 260 | |
| 261 // This method posts a task on WatchDogThread to start watching all browser | 288 // This method posts a task on WatchDogThread to start watching all browser |
| 262 // threads. | 289 // threads. |
| 263 // This method is accessible on UI thread. | 290 // This method is accessible on UI thread. |
| 264 static void StartWatchingAll(); | 291 static void StartWatchingAll(const CommandLine& command_line); |
| 265 | 292 |
| 266 // This method posts a task on WatchDogThread to RevokeAll tasks and to | 293 // This method posts a task on WatchDogThread to RevokeAll tasks and to |
| 267 // deactive thread watching of other threads and tell NotificationService to | 294 // deactive thread watching of other threads and tell NotificationService to |
| 268 // stop calling Observe. | 295 // stop calling Observe. |
| 269 // This method is accessible on UI thread. | 296 // This method is accessible on UI thread. |
| 270 static void StopWatchingAll(); | 297 static void StopWatchingAll(); |
| 271 | 298 |
| 272 // RemoveAll NotificationTypes that are being observed. | 299 // Register() stores a pointer to the given ThreadWatcher in a global map. |
| 273 // This method is accessible on UI thread. | 300 static void Register(ThreadWatcher* watcher); |
| 274 static void RemoveNotifications(); | |
| 275 | 301 |
| 276 // This method returns number of watched threads that have responded and | 302 // This method returns true if the ThreadWatcher object is registerd. |
| 277 // threads that have not responded with a pong message. | 303 static bool IsRegistered(const BrowserThread::ID thread_id); |
| 278 static void GetStatusOfThreads(int* no_of_responding_threads, | 304 |
| 279 int* no_of_unresponding_threads); | 305 // This method returns number of responsive and unresponsive watched threads. |
| 306 static void GetStatusOfThreads(uint32* responding_thread_count, | |
| 307 uint32* unresponding_thread_count); | |
| 308 | |
| 309 // This will ensure that the watching is actively taking place, and awaken | |
| 310 // all thread watchers that are registered. | |
| 311 static void WakeUpAll(); | |
| 280 | 312 |
| 281 private: | 313 private: |
| 282 // Allow tests to access our innards for testing purposes. | 314 // Allow tests to access our innards for testing purposes. |
| 283 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); | 315 friend class CustomThreadWatcher; |
| 316 friend class ThreadWatcherTest; | |
| 317 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CommandLineArgs); | |
| 284 | 318 |
| 285 // Delete all thread watcher objects and remove them from global map. | 319 // This singleton holds the global list of registered ThreadWatchers. |
| 286 // This method is accessible on WatchDogThread. | 320 ThreadWatcherList(); |
| 287 void DeleteAll(); | |
| 288 | 321 |
| 289 // This will ensure that the watching is actively taking place. It will wakeup | 322 // Destructor deletes all registered ThreadWatcher instances. |
| 290 // all thread watchers every 2 seconds. This is the implementation of | 323 virtual ~ThreadWatcherList(); |
| 291 // NotificationObserver. When a matching notification is posted to the | |
| 292 // notification service, this method is called. | |
| 293 // This method is accessible on UI thread. | |
| 294 virtual void Observe(NotificationType type, | |
| 295 const NotificationSource& source, | |
| 296 const NotificationDetails& details); | |
| 297 | 324 |
| 298 // This will ensure that the watching is actively taking place, and awaken | 325 // Parses the command line to get |unresponsive_threshold| from |
| 299 // all thread watchers that are registered. | 326 // switches::kCrashOnHangSeconds, |crash_on_hang_thread_names| from |
| 300 // This method is accessible on WatchDogThread. | 327 // switches::kCrashOnHangThreads and |live_threads_threshold| from |
| 301 virtual void WakeUpAll(); | 328 // switches::kCrashOnLive. |crash_on_hang_thread_names| is the set of watched |
| 329 // thread's names that are to be crashed if they are not responding. | |
| 330 static void ParseCommandLine( | |
| 331 const CommandLine& command_line, | |
| 332 uint32* unresponsive_threshold, | |
| 333 std::set<std::string>* crash_on_hang_thread_names, | |
| 334 uint32* live_threads_threshold); | |
| 335 | |
| 336 // This constructs the |ThreadWatcherList| singleton and starts watching | |
| 337 // browser threads by calling StartWatching() on each browser thread that is | |
| 338 // watched. | |
| 339 static void InitializeAndStartWatching( | |
| 340 uint32 unresponsive_threshold, | |
| 341 const std::set<std::string>& crash_on_hang_thread_names, | |
| 342 uint32 live_threads_threshold); | |
| 343 | |
| 344 // This method calls ThreadWatcher::StartWatching() to perform health check on | |
| 345 // the given |thread_id|. | |
| 346 static void StartWatching( | |
| 347 const BrowserThread::ID& thread_id, | |
| 348 const std::string& thread_name, | |
| 349 const base::TimeDelta& sleep_time, | |
| 350 const base::TimeDelta& unresponsive_time, | |
| 351 uint32 unresponsive_threshold, | |
| 352 const std::set<std::string>& crash_on_hang_thread_names, | |
| 353 uint32 live_threads_threshold); | |
| 354 | |
| 355 // Delete all thread watcher objects and remove them from global map. It also | |
| 356 // deletes |g_thread_watcher_list_|. | |
| 357 static void DeleteAll(); | |
| 302 | 358 |
| 303 // The Find() method can be used to test to see if a given ThreadWatcher was | 359 // The Find() method can be used to test to see if a given ThreadWatcher was |
| 304 // already registered, or to retrieve a pointer to it from the global map. | 360 // already registered, or to retrieve a pointer to it from the global map. |
| 305 static ThreadWatcher* Find(const BrowserThread::ID& thread_id); | 361 static ThreadWatcher* Find(const BrowserThread::ID& thread_id); |
| 306 | 362 |
| 307 // Helper function should be called only while holding lock_. | 363 // The singleton of this class and is used to keep track of information about |
| 308 ThreadWatcher* PreLockedFind(const BrowserThread::ID& thread_id); | 364 // threads that are being watched. |
| 309 | 365 static ThreadWatcherList* g_thread_watcher_list_; |
| 310 static ThreadWatcherList* global_; // The singleton of this class. | |
| 311 | 366 |
| 312 // This is the wait time between ping messages. | 367 // This is the wait time between ping messages. |
| 313 static const int kSleepSeconds; | 368 static const int kSleepSeconds; |
| 314 | 369 |
| 315 // This is the wait time after ping message is sent, to check if we have | 370 // This is the wait time after ping message is sent, to check if we have |
| 316 // received pong message or not. | 371 // received pong message or not. |
| 317 static const int kUnresponsiveSeconds; | 372 static const int kUnresponsiveSeconds; |
| 318 | 373 |
| 319 // Lock for access to registered_. | 374 // Default values for |unresponsive_threshold|. |
| 320 base::Lock lock_; | 375 static const int kUnresponsiveCount; |
| 376 | |
| 377 // Default values for |live_threads_threshold|. | |
| 378 static const int kLiveThreadsThreshold; | |
| 321 | 379 |
| 322 // Map of all registered watched threads, from thread_id to ThreadWatcher. | 380 // Map of all registered watched threads, from thread_id to ThreadWatcher. |
| 323 RegistrationList registered_; | 381 RegistrationList registered_; |
| 324 | 382 |
| 383 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); | |
| 384 }; | |
| 385 | |
| 386 // This class ensures that the thread watching is actively taking place. Only | |
| 387 // one instance of this class exists. | |
| 388 class ThreadWatcherObserver : public NotificationObserver { | |
| 389 public: | |
| 390 // Registers |g_thread_watcher_observer_| as the Notifications observer. | |
| 391 // |wakeup_interval| specifies how often to wake up thread watchers. This | |
| 392 // method is accessible on UI thread. | |
| 393 static void SetupNotifications(const base::TimeDelta& wakeup_interval); | |
| 394 | |
| 395 // Removes all NotificationTypes from |registrar_| and deletes | |
| 396 // |g_thread_watcher_observer_|. This method is accessible on UI thread. | |
| 397 static void RemoveNotifications(); | |
| 398 | |
| 399 private: | |
| 400 // Constructor of |g_thread_watcher_observer_| singleton. | |
| 401 explicit ThreadWatcherObserver(const base::TimeDelta& wakeup_interval); | |
| 402 | |
| 403 // Destructor of |g_thread_watcher_observer_| singleton. | |
| 404 virtual ~ThreadWatcherObserver(); | |
| 405 | |
| 406 // This ensures all thread watchers are active because there is some user | |
| 407 // activity. It will wake up all thread watchers every |wakeup_interval_| | |
| 408 // seconds. This is the implementation of NotificationObserver. When a | |
| 409 // matching notification is posted to the notification service, this method is | |
| 410 // called. | |
| 411 virtual void Observe(NotificationType type, | |
| 412 const NotificationSource& source, | |
| 413 const NotificationDetails& details); | |
| 414 | |
| 415 // The singleton of this class. | |
| 416 static ThreadWatcherObserver* g_thread_watcher_observer_; | |
| 417 | |
| 325 // The registrar that holds NotificationTypes to be observed. | 418 // The registrar that holds NotificationTypes to be observed. |
| 326 NotificationRegistrar registrar_; | 419 NotificationRegistrar registrar_; |
| 327 | 420 |
| 328 // This is the last time when woke all thread watchers up. | 421 // This is the last time when woke all thread watchers up. |
| 329 base::TimeTicks last_wakeup_time_; | 422 base::TimeTicks last_wakeup_time_; |
| 330 | 423 |
| 331 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); | 424 // It is the time interval between wake up calls to thread watchers. |
| 425 const base::TimeDelta wakeup_interval_; | |
| 426 | |
| 427 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherObserver); | |
| 332 }; | 428 }; |
| 333 | 429 |
| 334 // Class for WatchDogThread and in its Init method, we start watching UI, IO, | 430 // Class for WatchDogThread and in its Init method, we start watching UI, IO, |
| 335 // DB, FILE, CACHED threads. | 431 // DB, FILE, CACHED threads. |
| 336 class WatchDogThread : public base::Thread { | 432 class WatchDogThread : public base::Thread { |
| 337 public: | 433 public: |
| 338 // Constructor. | 434 // Constructor. |
| 339 WatchDogThread(); | 435 WatchDogThread(); |
| 340 | 436 |
| 341 // Destroys the thread and stops the thread. | 437 // Destroys the thread and stops the thread. |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 373 | 469 |
| 374 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); | 470 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); |
| 375 }; | 471 }; |
| 376 | 472 |
| 377 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling | 473 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling |
| 378 // refcounting of ThreadWatcher and ThreadWatcherList classes. | 474 // refcounting of ThreadWatcher and ThreadWatcherList classes. |
| 379 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher); | 475 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher); |
| 380 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList); | 476 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList); |
| 381 | 477 |
| 382 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ | 478 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ |
| OLD | NEW |