Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(240)

Side by Side Diff: chrome/browser/metrics/thread_watcher.h

Issue 7134007: Added command line switches "crash-on-hang-threads" and "crash-on-hang-seconds" (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // This file defines a WatchDog thread that monitors the responsiveness of other 5 // This file defines a WatchDog thread that monitors the responsiveness of other
6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines 6 // browser threads like UI, IO, DB, FILE and CACHED threads. It also defines
7 // ThreadWatcher class which performs health check on threads that would like to 7 // ThreadWatcher class which performs health check on threads that would like to
8 // be watched. This file also defines ThreadWatcherList class that has list of 8 // be watched. This file also defines ThreadWatcherList class that has list of
9 // all active ThreadWatcher objects. 9 // all active ThreadWatcher objects.
10 // 10 //
(...skipping 14 matching lines...) Expand all
25 // 25 //
26 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5); 26 // base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5);
27 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10); 27 // base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10);
28 // ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time, 28 // ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time,
29 // unresponsive_time); 29 // unresponsive_time);
30 30
31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 31 #ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 32 #define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
33 33
34 #include <map> 34 #include <map>
35 #include <set>
35 #include <string> 36 #include <string>
36 #include <vector> 37 #include <vector>
37 38
38 #include "base/basictypes.h" 39 #include "base/basictypes.h"
40 #include "base/command_line.h"
39 #include "base/gtest_prod_util.h" 41 #include "base/gtest_prod_util.h"
40 #include "base/memory/ref_counted.h" 42 #include "base/memory/ref_counted.h"
41 #include "base/memory/scoped_ptr.h" 43 #include "base/memory/scoped_ptr.h"
42 #include "base/message_loop.h" 44 #include "base/message_loop.h"
43 #include "base/metrics/histogram.h" 45 #include "base/metrics/histogram.h"
44 #include "base/synchronization/lock.h" 46 #include "base/synchronization/lock.h"
45 #include "base/task.h" 47 #include "base/task.h"
46 #include "base/threading/thread.h" 48 #include "base/threading/thread.h"
47 #include "base/time.h" 49 #include "base/time.h"
48 #include "content/browser/browser_thread.h" 50 #include "content/browser/browser_thread.h"
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
151 Task* callback_task); 153 Task* callback_task);
152 154
153 // This method resets unresponsive_count_ to zero because watched thread is 155 // This method resets unresponsive_count_ to zero because watched thread is
154 // responding to the ping message with a pong message. 156 // responding to the ping message with a pong message.
155 void ResetHangCounters(); 157 void ResetHangCounters();
156 158
157 // This method records watched thread is not responding to the ping message. 159 // This method records watched thread is not responding to the ping message.
158 // It increments unresponsive_count_ by 1. 160 // It increments unresponsive_count_ by 1.
159 void GotNoResponse(); 161 void GotNoResponse();
160 162
163 // Helper method to get |unresponsive_count_|.
164 uint32 unresponsive_count() const { return unresponsive_count_; }
165
161 // This is the number of ping messages to be sent when the user is idle. 166 // This is the number of ping messages to be sent when the user is idle.
162 // ping_count_ will be initialized to kPingCount whenever user becomes active. 167 // ping_count_ will be initialized to kPingCount whenever user becomes active.
163 static const int kPingCount; 168 static const int kPingCount;
164 169
165 // This value is used to determine if the watched thread is responsive or not.
166 // If unresponsive_count_ is less than kUnresponsiveCount then watched thread
167 // is considered as responsive (in responsive_count_histogram_) otherwise it
168 // is considered as unresponsive (in unresponsive_count_histogram_).
169 static const int kUnresponsiveCount;
170
171 // The thread_id of the thread being watched. Only one instance can exist for 170 // The thread_id of the thread being watched. Only one instance can exist for
172 // the given thread_id of the thread being watched. 171 // the given thread_id of the thread being watched.
173 const BrowserThread::ID thread_id_; 172 const BrowserThread::ID thread_id_;
174 173
175 // The name of the thread being watched. 174 // The name of the thread being watched.
176 const std::string thread_name_; 175 const std::string thread_name_;
177 176
178 // It is the sleep time between between the receipt of a pong message back, 177 // It is the sleep time between between the receipt of a pong message back,
179 // and the sending of another ping message. 178 // and the sending of another ping message.
180 const base::TimeDelta sleep_time_; 179 const base::TimeDelta sleep_time_;
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
216 215
217 // Histogram that keeps track of how many threads are not responding when we 216 // Histogram that keeps track of how many threads are not responding when we
218 // got no response (GotNoResponse) from the watched thread. Count includes the 217 // got no response (GotNoResponse) from the watched thread. Count includes the
219 // thread that got no response. 218 // thread that got no response.
220 base::Histogram* unresponsive_count_histogram_; 219 base::Histogram* unresponsive_count_histogram_;
221 220
222 // This counter tracks the unresponsiveness of watched thread. If this value 221 // This counter tracks the unresponsiveness of watched thread. If this value
223 // is zero then watched thread has responded with a pong message. This is 222 // is zero then watched thread has responded with a pong message. This is
224 // incremented by 1 when we got no response (GotNoResponse) from the watched 223 // incremented by 1 when we got no response (GotNoResponse) from the watched
225 // thread. 224 // thread.
226 int unresponsive_count_; 225 uint32 unresponsive_count_;
227 226
228 // This is set to true when we would have crashed the browser because the 227 // This is set to true when we would have crashed the browser because the
229 // watched thread hasn't responded atleast 6 times. It is reset to false when 228 // watched thread hasn't responded atleast 6 times. It is reset to false when
230 // watched thread responds with a pong message. 229 // watched thread responds with a pong message.
231 bool hung_processing_complete_; 230 bool hung_processing_complete_;
232 231
233 // We use this factory to create callback tasks for ThreadWatcher object. We 232 // We use this factory to create callback tasks for ThreadWatcher object. We
234 // use this during ping-pong messaging between WatchDog thread and watched 233 // use this during ping-pong messaging between WatchDog thread and watched
235 // thread. 234 // thread.
236 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_; 235 ScopedRunnableMethodFactory<ThreadWatcher> method_factory_;
237 236
238 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher); 237 DISALLOW_COPY_AND_ASSIGN(ThreadWatcher);
239 }; 238 };
240 239
241 // Class with a list of all active thread watchers. A thread watcher is active 240 // Class with a list of all active thread watchers. A thread watcher is active
242 // if it has been registered, which includes determing the histogram name. This 241 // if it has been registered, which includes determing the histogram name. This
243 // class provides utility functions to start and stop watching all browser 242 // class provides utility functions to start and stop watching all browser
244 // threads. Only one instance of this class exists. 243 // threads. Only one instance of this class exists.
245 class ThreadWatcherList : public NotificationObserver { 244 class ThreadWatcherList : public NotificationObserver {
246 public: 245 public:
247 // A map from BrowserThread to the actual instances. 246 // A map from BrowserThread to the actual instances.
248 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList; 247 typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList;
249 248
250 // This singleton holds the global list of registered ThreadWatchers. 249 // This singleton holds the global list of registered ThreadWatchers.
251 ThreadWatcherList(); 250 explicit ThreadWatcherList(const CommandLine& command_line);
252 // Destructor deletes all registered ThreadWatcher instances. 251 // Destructor deletes all registered ThreadWatcher instances.
253 virtual ~ThreadWatcherList(); 252 virtual ~ThreadWatcherList();
254 253
255 // Register() stores a pointer to the given ThreadWatcher in a global map. 254 // Register() stores a pointer to the given ThreadWatcher in a global map.
256 static void Register(ThreadWatcher* watcher); 255 static void Register(ThreadWatcher* watcher);
257 256
258 // This method returns true if the ThreadWatcher object is registerd. 257 // This method returns true if the ThreadWatcher object is registerd.
259 static bool IsRegistered(const BrowserThread::ID thread_id); 258 static bool IsRegistered(const BrowserThread::ID thread_id);
260 259
261 // This method posts a task on WatchDogThread to start watching all browser 260 // This method posts a task on WatchDogThread to start watching all browser
262 // threads. 261 // threads.
263 // This method is accessible on UI thread. 262 // This method is accessible on UI thread.
264 static void StartWatchingAll(); 263 static void StartWatchingAll();
265 264
266 // This method posts a task on WatchDogThread to RevokeAll tasks and to 265 // This method posts a task on WatchDogThread to RevokeAll tasks and to
267 // deactive thread watching of other threads and tell NotificationService to 266 // deactive thread watching of other threads and tell NotificationService to
268 // stop calling Observe. 267 // stop calling Observe.
269 // This method is accessible on UI thread. 268 // This method is accessible on UI thread.
270 static void StopWatchingAll(); 269 static void StopWatchingAll();
271 270
272 // RemoveAll NotificationTypes that are being observed. 271 // RemoveAll NotificationTypes that are being observed.
273 // This method is accessible on UI thread. 272 // This method is accessible on UI thread.
274 static void RemoveNotifications(); 273 static void RemoveNotifications();
275 274
275 // Returns true if watched thread's |unresponsive_count_| is less than
276 // |crash_on_unresponsive_count_|. A watched thread is considered as
277 // unresponsive if it has not responded with a pong message for
278 // |crash_on_unresponsive_count_| number of ping messages.
279 static bool IsResponsive(ThreadWatcher* watcher);
280
281 // Returns true if the watched thread is not responsive and is listed
282 // as one of the threads in "--crash-on-hang-threads" command line switch.
283 static bool CrashOnHang(ThreadWatcher* watcher);
284
276 // This method returns number of watched threads that have responded and 285 // This method returns number of watched threads that have responded and
277 // threads that have not responded with a pong message. 286 // threads that have not responded with a pong message for
287 // |crash_on_unresponsive_count_| number of ping messages.
278 static void GetStatusOfThreads(int* no_of_responding_threads, 288 static void GetStatusOfThreads(int* no_of_responding_threads,
279 int* no_of_unresponding_threads); 289 int* no_of_unresponding_threads);
280 290
281 private: 291 private:
282 // Allow tests to access our innards for testing purposes. 292 // Allow tests to access our innards for testing purposes.
283 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration); 293 FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
284 294
285 // Delete all thread watcher objects and remove them from global map. 295 // Delete all thread watcher objects and remove them from global map.
286 // This method is accessible on WatchDogThread. 296 // This method is accessible on WatchDogThread.
287 void DeleteAll(); 297 void DeleteAll();
(...skipping 21 matching lines...) Expand all
309 319
310 static ThreadWatcherList* global_; // The singleton of this class. 320 static ThreadWatcherList* global_; // The singleton of this class.
311 321
312 // This is the wait time between ping messages. 322 // This is the wait time between ping messages.
313 static const int kSleepSeconds; 323 static const int kSleepSeconds;
314 324
315 // This is the wait time after ping message is sent, to check if we have 325 // This is the wait time after ping message is sent, to check if we have
316 // received pong message or not. 326 // received pong message or not.
317 static const int kUnresponsiveSeconds; 327 static const int kUnresponsiveSeconds;
318 328
329 // This is used to initialize |crash_on_unresponsive_count_|.
330 // |crash_on_unresponsive_count_| is used to determine if the watched thread
331 // is responsive or not.
jar (doing other things) 2011/06/10 00:38:33 nit: Don't bother explaining the variable... since
ramant (doing other things) 2011/06/13 03:26:02 Done.
332 static const int kUnresponsiveCount;
333
319 // Lock for access to registered_. 334 // Lock for access to registered_.
320 base::Lock lock_; 335 base::Lock lock_;
321 336
322 // Map of all registered watched threads, from thread_id to ThreadWatcher. 337 // Map of all registered watched threads, from thread_id to ThreadWatcher.
323 RegistrationList registered_; 338 RegistrationList registered_;
324 339
325 // The registrar that holds NotificationTypes to be observed. 340 // The registrar that holds NotificationTypes to be observed.
326 NotificationRegistrar registrar_; 341 NotificationRegistrar registrar_;
327 342
328 // This is the last time when woke all thread watchers up. 343 // This is the last time when woke all thread watchers up.
329 base::TimeTicks last_wakeup_time_; 344 base::TimeTicks last_wakeup_time_;
330 345
346 // This is used to determine if the watched thread is responsive or not. If
347 // watched thread's |unresponsive_count_| is greater than or equal to
348 // |crash_on_unresponsive_count_| then we could crash the browser if the
349 // watched thread is listed in the "--crash-on-hang-threads" command line
350 // switch. It is initialized with |kUnresponsiveCount|, but can be overwritten
351 // by the command line switch "--crash-on-hang-seconds".
352 uint32 crash_on_unresponsive_count_;
353
354 // This is the set of watched thread's names that are to be crashed if they
355 // have not responded with a pong message for |crash_on_unresponsive_count_|
356 // number of ping messages.
357 std::set<std::string> crash_on_hang_thread_names_;
358
331 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList); 359 DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList);
332 }; 360 };
333 361
334 // Class for WatchDogThread and in its Init method, we start watching UI, IO, 362 // Class for WatchDogThread and in its Init method, we start watching UI, IO,
335 // DB, FILE, CACHED threads. 363 // DB, FILE, CACHED threads.
336 class WatchDogThread : public base::Thread { 364 class WatchDogThread : public base::Thread {
337 public: 365 public:
338 // Constructor. 366 // Constructor.
339 WatchDogThread(); 367 WatchDogThread();
340 368
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
373 401
374 DISALLOW_COPY_AND_ASSIGN(WatchDogThread); 402 DISALLOW_COPY_AND_ASSIGN(WatchDogThread);
375 }; 403 };
376 404
377 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling 405 // DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling
378 // refcounting of ThreadWatcher and ThreadWatcherList classes. 406 // refcounting of ThreadWatcher and ThreadWatcherList classes.
379 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher); 407 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher);
380 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList); 408 DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList);
381 409
382 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_ 410 #endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698