Index: chrome/browser/metrics/thread_watcher.h |
=================================================================== |
--- chrome/browser/metrics/thread_watcher.h (revision 181723) |
+++ chrome/browser/metrics/thread_watcher.h (working copy) |
@@ -308,30 +308,72 @@ |
// A map from BrowserThread to the actual instances. |
typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList; |
- // A map from thread names (UI, IO, etc) to |live_threads_threshold|. |
+ // A map from thread names (UI, IO, etc) to |CrashDataThresholds|. |
// |live_threads_threshold| specifies the maximum number of browser threads |
// that have to be responsive when we want to crash the browser because of |
- // hung watched thread. |
+ // hung watched thread. This threshold allows us to either look for a system |
+ // deadlock, or look for a solo hung thread. A small live_threads_threshold |
+ // looks for a broad deadlock (few browser threads left running), and a large |
+ // threshold looks for a single hung thread (this in only appropriate for a |
+ // thread that *should* never have much jank, such as the IO). |
// |
+ // |unresponsive_threshold| specifies the number of unanswered ping messages |
+ // after which watched (UI, IO, etc) thread is considered as not responsive. |
+ // We translate "time" (given in seconds) into a number of pings. As a result, |
+ // we only declare a thread unresponsive when a lot of "time" has passed (many |
+ // pings), and yet our pinging thread has continued to process messages (so we |
+ // know the entire PC is not hung). Set this number higher to crash less |
+ // often, and lower to crash more often. |
+ // |
// The map lists all threads (by name) that can induce a crash by hanging. It |
// is populated from the command line, or given a default list. See |
// InitializeAndStartWatching() for the separate list of all threads that are |
// watched, as they provide the system context of how hung *other* threads |
// are. |
// |
- // Example 1: If the value for "IO" was 3, then we would crash if at least one |
- // thread is responding and total responding threads is less than or equal to |
- // 3 (this thread, plus at least one other thread is unresponsive). We would |
- // not crash if none of the threads are not responding, as we'd assume such |
- // large hang counts mean that the system is generally unresponsive. |
- // Example 2: If the value for "UI" was INT_MAX, then we would always crash if |
- // the UI thread was hung, no matter what the other threads are doing. |
- // Example 3: If the value of "FILE" was 5, then we would only crash if the |
- // FILE thread was the ONLY hung thread (because we watch 6 threads). IF there |
- // was another unresponsive thread, we would not consider this a problem worth |
- // crashing for. |
- typedef std::map<std::string, uint32> CrashOnHangThreadMap; |
+ // CrashOnHangThreadMap is populated by ParseCommandLineCrashOnHangThreads(). |
+ // It parses command line argument like "UI:3:18,IO:3:18,FILE:5:90". In this |
+ // string, the first parameter specifies the thread_id: UI, IO or FILE. The |
+ // second parameter specifies |live_threads_threshold|. For UI and IO threads, |
+ // we would crash if the number of threads responding is less than or equal to |
+ // 3. The third parameter specifies the unresponsive threshold seconds. This |
+ // number is used to calculate |unresponsive_threshold|. In this example for |
+ // UI and IO threads, we would crash if it doesn't respond for 18 seconds (or |
jar (doing other things)
2013/02/12 01:27:27
nit: it doesn't --> those threads don't
ramant (doing other things)
2013/02/12 01:54:30
Done.
|
+ // 9 unanswered ping messages) and for FILE thread, crash_seconds is set to 90 |
+ // seconds (or 45 unanswered ping messages). |
+ // |
+ // ThreadWatcher watches six (UI, IO, DB, FILE, FILE_USER_BLOCKING and CACHE) |
jar (doing other things)
2013/02/12 01:27:27
This paragraph should move up earlier, before you
ramant (doing other things)
2013/02/12 01:54:30
Done.
|
+ // browser threads. The following examples explain how the data in |
+ // |CrashDataThresholds| controls the crashes. |
+ // |
+ // Example 1: If the |live_threads_threshold| value for "IO" was 3 and |
+ // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9), |
+ // then we would crash if the IO thread was hung (9 unanswered ping messages) |
+ // and if at least one thread is responding and total responding threads is |
+ // less than or equal to 3 (this thread, plus at least one other thread is |
+ // unresponsive). We would not crash if none of the threads are not |
+ // responding, as we'd assume such large hang counts mean that the system is |
jar (doing other things)
2013/02/12 01:27:27
nit: should have read:
We would not crash if none
ramant (doing other things)
2013/02/12 01:54:30
Done.
|
+ // generally unresponsive. |
+ // Example 2: If the |live_threads_threshold| value for "UI" was INT_MAX and |
jar (doing other things)
2013/02/12 01:27:27
Instead of INT_MAX, "any number higher than 6"
ramant (doing other things)
2013/02/12 01:54:30
Done.
|
+ // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9), |
+ // then we would always crash if the UI thread was hung (9 unanswered ping |
+ // messages), no matter what the other threads are doing. |
+ // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and |
+ // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45), |
+ // then we would only crash if the FILE thread was the ONLY hung thread |
+ // (because we watch 6 threads). If there was another unresponsive thread, we |
+ // would not consider this a problem worth crashing for. FILE thread would be |
+ // considered as hung if it didn't respond for 45 ping messages. |
+ struct CrashDataThresholds { |
+ CrashDataThresholds(uint32 live_threads_threshold, |
+ uint32 unresponsive_threshold); |
+ CrashDataThresholds(); |
+ uint32 live_threads_threshold; |
+ uint32 unresponsive_threshold; |
+ }; |
+ typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap; |
+ |
// This method posts a task on WatchDogThread to start watching all browser |
// threads. |
// This method is accessible on UI thread. |
@@ -361,7 +403,9 @@ |
// Allow tests to access our innards for testing purposes. |
friend class CustomThreadWatcher; |
friend class ThreadWatcherTest; |
- FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CommandLineArgs); |
+ FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs); |
+ FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs); |
+ FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs); |
// This singleton holds the global list of registered ThreadWatchers. |
ThreadWatcherList(); |
@@ -369,16 +413,26 @@ |
// Destructor deletes all registered ThreadWatcher instances. |
virtual ~ThreadWatcherList(); |
- // Parses the command line to get |unresponsive_threshold| from |
- // switches::kCrashOnHangSeconds, |crash_on_hang| thread names from |
- // switches::kCrashOnHangThreads and |live_threads_threshold| from |
- // switches::kCrashOnLive. |crash_on_hang_threads| is a map of |
- // |crash_on_hang| thread's names to |live_threads_threshold|. |
+ // Parses the command line to get |crash_on_hang_threads| map from |
+ // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of |
+ // |crash_on_hang| thread's names to |CrashDataThresholds|. |
static void ParseCommandLine( |
const CommandLine& command_line, |
uint32* unresponsive_threshold, |
CrashOnHangThreadMap* crash_on_hang_threads); |
+ // Parses the argument |crash_on_hang_thread_names| and creates |
+ // |crash_on_hang_threads| map of |crash_on_hang| thread's names to |
+ // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify |
+ // |live_threads_threshold|, then it uses |default_live_threads_threshold| as |
+ // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|, |
+ // then it uses |default_crash_seconds| as the value. |
+ static void ParseCommandLineCrashOnHangThreads( |
+ const std::string& crash_on_hang_thread_names, |
+ uint32 default_live_threads_threshold, |
+ uint32 default_crash_seconds, |
+ CrashOnHangThreadMap* crash_on_hang_threads); |
+ |
// This constructs the |ThreadWatcherList| singleton and starts watching |
// browser threads by calling StartWatching() on each browser thread that is |
// watched. It disarms StartupTimeBomb. |