Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1104)

Side by Side Diff: chrome/chrome_watcher/kasko_util.cc

Issue 2086403002: Remove the Kasko based out of process browser hang instrumentation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Merge Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/chrome_watcher/kasko_util.h ('k') | third_party/kasko/BUILD.gn » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/chrome_watcher/kasko_util.h"
6
7 #include <sddl.h>
8
9 #include <memory>
10 #include <set>
11 #include <string>
12 #include <utility>
13 #include <vector>
14
15 #include "base/base_paths.h"
16 #include "base/bind.h"
17 #include "base/callback_helpers.h"
18 #include "base/environment.h"
19 #include "base/files/file_path.h"
20 #include "base/format_macros.h"
21 #include "base/macros.h"
22 #include "base/path_service.h"
23 #include "base/strings/string_number_conversions.h"
24 #include "base/strings/string_util.h"
25 #include "base/strings/stringprintf.h"
26 #include "base/strings/utf_string_conversions.h"
27 #include "base/win/wait_chain.h"
28 #include "base/win/win_util.h"
29
30 #include "chrome/chrome_watcher/chrome_watcher_main_api.h"
31 #include "chrome/chrome_watcher/system_load_estimator.h"
32 #include "components/crash/content/app/crashpad.h"
33 #include "components/memory_pressure/direct_memory_pressure_calculator_win.h"
34 #include "components/memory_pressure/memory_pressure_calculator.h"
35 #include "syzygy/kasko/api/reporter.h"
36
37 namespace {
38
39 using MemoryPressureLevel =
40 memory_pressure::MemoryPressureCalculator::MemoryPressureLevel;
41
42 // Labels a crash report to the server as a hang report.
43 const wchar_t kHangReportCrashKey[] = L"hang-report";
44
45 // Helper function for determining the crash server to use. Defaults to the
46 // standard crash server, but can be overridden via an environment variable.
47 // Enables easy integration testing.
48 base::string16 GetKaskoCrashServerUrl() {
49 static const char kKaskoCrashServerUrl[] = "KASKO_CRASH_SERVER_URL";
50 static const wchar_t kDefaultKaskoCrashServerUrl[] =
51 L"https://clients2.google.com/cr/report";
52
53 std::unique_ptr<base::Environment> env(base::Environment::Create());
54 std::string env_var;
55 if (env->GetVar(kKaskoCrashServerUrl, &env_var)) {
56 return base::UTF8ToUTF16(env_var);
57 }
58 return kDefaultKaskoCrashServerUrl;
59 }
60
61 // Helper function for determining the crash reports directory to use. Defaults
62 // to the browser data directory, but can be overridden via an environment
63 // variable. Enables easy integration testing.
64 base::FilePath GetKaskoCrashReportsBaseDir(
65 const base::char16* browser_data_directory) {
66 static const char kKaskoCrashReportBaseDir[] = "KASKO_CRASH_REPORTS_BASE_DIR";
67 std::unique_ptr<base::Environment> env(base::Environment::Create());
68 std::string env_var;
69 if (env->GetVar(kKaskoCrashReportBaseDir, &env_var)) {
70 return base::FilePath(base::UTF8ToUTF16(env_var));
71 }
72 return base::FilePath(browser_data_directory);
73 }
74
75 struct EventSourceDeregisterer {
76 using pointer = HANDLE;
77 void operator()(HANDLE event_source_handle) const {
78 if (!::DeregisterEventSource(event_source_handle))
79 DPLOG(ERROR) << "DeregisterEventSource";
80 }
81 };
82 using ScopedEventSourceHandle =
83 std::unique_ptr<HANDLE, EventSourceDeregisterer>;
84
85 struct SidDeleter {
86 using pointer = PSID;
87 void operator()(PSID sid) const {
88 if (::LocalFree(sid) != nullptr)
89 DPLOG(ERROR) << "LocalFree";
90 }
91 };
92 using ScopedSid = std::unique_ptr<PSID, SidDeleter>;
93
94 void OnCrashReportUpload(void* context,
95 const base::char16* report_id,
96 const base::char16* minidump_path,
97 const base::char16* const* keys,
98 const base::char16* const* values) {
99 // Open the event source.
100 ScopedEventSourceHandle event_source_handle(
101 ::RegisterEventSource(nullptr, L"Chrome"));
102 if (!event_source_handle) {
103 PLOG(ERROR) << "RegisterEventSource";
104 return;
105 }
106
107 // Get the user's SID for the log record.
108 base::string16 sid_string;
109 PSID sid = nullptr;
110 if (base::win::GetUserSidString(&sid_string) && !sid_string.empty()) {
111 if (!::ConvertStringSidToSid(sid_string.c_str(), &sid))
112 DPLOG(ERROR) << "ConvertStringSidToSid";
113 DCHECK(sid);
114 }
115 // Ensure cleanup on scope exit.
116 ScopedSid scoped_sid;
117 if (sid)
118 scoped_sid.reset(sid);
119
120 // Generate the message.
121 // Note that the format of this message must match the consumer in
122 // chrome/browser/crash_upload_list_win.cc.
123 base::string16 message =
124 L"Crash uploaded. Id=" + base::string16(report_id) + L".";
125
126 // Matches Omaha.
127 const int kCrashUploadEventId = 2;
128
129 // Report the event.
130 const base::char16* strings[] = {message.c_str()};
131 if (!::ReportEvent(event_source_handle.get(), EVENTLOG_INFORMATION_TYPE,
132 0, // category
133 kCrashUploadEventId, sid,
134 1, // count
135 0, strings, nullptr)) {
136 DPLOG(ERROR);
137 }
138 }
139
140 void AddCrashKey(const wchar_t *key, const wchar_t *value,
141 std::vector<kasko::api::CrashKey> *crash_keys) {
142 DCHECK(key);
143 DCHECK(value);
144 DCHECK(crash_keys);
145
146 crash_keys->resize(crash_keys->size() + 1);
147 kasko::api::CrashKey& crash_key = crash_keys->back();
148 base::wcslcpy(crash_key.name, key, kasko::api::CrashKey::kNameMaxLength);
149 base::wcslcpy(crash_key.value, value, kasko::api::CrashKey::kValueMaxLength);
150 }
151
152 // Get the |process| and the |thread_id| of the node inside the |wait_chain|
153 // that is of type ThreadType and belongs to a process that is valid for the
154 // capture of a crash dump. Returns true if such a node was found.
155 bool GetLastValidNodeInfo(const base::win::WaitChainNodeVector& wait_chain,
156 base::Process* process,
157 DWORD* thread_id) {
158 // The last thread in the wait chain is nominated as the hung thread.
159 base::win::WaitChainNodeVector::const_reverse_iterator it;
160 for (it = wait_chain.rbegin(); it != wait_chain.rend(); ++it) {
161 if (it->ObjectType != WctThreadType)
162 continue;
163
164 auto current_process = base::Process::Open(it->ThreadObject.ProcessId);
165 if (EnsureTargetProcessValidForCapture(current_process)) {
166 *process = std::move(current_process);
167 *thread_id = it->ThreadObject.ThreadId;
168 return true;
169 }
170 }
171 return false;
172 }
173
174 // Adds the entire wait chain to |crash_keys|.
175 //
176 // As an example (key : value):
177 // hung-process-wait-chain-00 : Thread 10242 in process 4554 with status Blocked
178 // hung-process-wait-chain-01 : Lock of type ThreadWait with status Owned
179 // hung-process-wait-chain-02 : Thread 77221 in process 4554 with status Blocked
180 //
181 void AddWaitChainToCrashKeys(const base::win::WaitChainNodeVector& wait_chain,
182 std::vector<kasko::api::CrashKey>* crash_keys) {
183 for (size_t i = 0; i < wait_chain.size(); i++) {
184 AddCrashKey(
185 base::StringPrintf(L"hung-process-wait-chain-%02" PRIuS, i).c_str(),
186 base::win::WaitChainNodeToString(wait_chain[i]).c_str(), crash_keys);
187 }
188 }
189
190 base::FilePath GetExeFilePathForProcess(const base::Process& process) {
191 wchar_t exe_name[MAX_PATH];
192 DWORD exe_name_len = arraysize(exe_name);
193 // Note: requesting the Win32 path format.
194 if (::QueryFullProcessImageName(process.Handle(), 0, exe_name,
195 &exe_name_len) == 0) {
196 DPLOG(ERROR) << "Failed to get executable name for process";
197 return base::FilePath();
198 }
199
200 // QueryFullProcessImageName's documentation does not specify behavior when
201 // the buffer is too small, but we know that GetModuleFileNameEx succeeds and
202 // truncates the returned name in such a case. Given that paths of arbitrary
203 // length may exist, the conservative approach is to reject names when
204 // the returned length is that of the buffer.
205 if (exe_name_len > 0 && exe_name_len < arraysize(exe_name))
206 return base::FilePath(exe_name);
207
208 return base::FilePath();
209 }
210
211 // Adds the executable base name for each unique pid found in the |wait_chain|
212 // to the |crash_keys|.
213 void AddProcessExeNameToCrashKeys(
214 const base::win::WaitChainNodeVector& wait_chain,
215 std::vector<kasko::api::CrashKey>* crash_keys) {
216 std::set<DWORD> unique_pids;
217 for (size_t i = 0; i < wait_chain.size(); i += 2)
218 unique_pids.insert(wait_chain[i].ThreadObject.ProcessId);
219
220 for (DWORD pid : unique_pids) {
221 // This is racy on the pid but for the purposes of this function, some error
222 // threshold can be tolerated. Hopefully the race doesn't happen often.
223 base::Process process(
224 base::Process::OpenWithAccess(pid, PROCESS_QUERY_LIMITED_INFORMATION));
225
226 base::string16 exe_file_path = L"N/A";
227 if (process.IsValid())
228 exe_file_path = GetExeFilePathForProcess(process).BaseName().value();
229
230 AddCrashKey(
231 base::StringPrintf(L"hung-process-wait-chain-pid-%u", pid).c_str(),
232 exe_file_path.c_str(), crash_keys);
233 }
234 }
235
236 void AddSystemLoadInformation(std::vector<kasko::api::CrashKey>* crash_keys) {
237 DCHECK(crash_keys);
238
239 // Add memory pressure level.
240 memory_pressure::DirectMemoryPressureCalculator memory_calculator;
241 const wchar_t* memory_pressure_level = L"";
242 switch (memory_calculator.CalculateCurrentPressureLevel()) {
243 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_NONE:
244 memory_pressure_level = L"none-or-unknown";
245 break;
246 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_MODERATE:
247 memory_pressure_level = L"moderate";
248 break;
249 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_CRITICAL:
250 memory_pressure_level = L"critical";
251 break;
252 }
253 AddCrashKey(L"memory-pressure", memory_pressure_level, crash_keys);
254
255 // Add measures of cpu and disk load.
256 chrome_watcher::SystemLoadEstimator::Estimate load_estimate = {};
257 if (!chrome_watcher::SystemLoadEstimator::Measure(&load_estimate))
258 return;
259
260 AddCrashKey(L"cpu-load-percent",
261 base::IntToString16(load_estimate.cpu_load_pct).c_str(),
262 crash_keys);
263 AddCrashKey(L"disk-idle-percent",
264 base::IntToString16(load_estimate.disk_idle_pct).c_str(),
265 crash_keys);
266 AddCrashKey(L"disk-avg-queue-len",
267 base::IntToString16(load_estimate.avg_disk_queue_len).c_str(),
268 crash_keys);
269 }
270
271 } // namespace
272
273 bool InitializeKaskoReporter(const base::string16& endpoint,
274 const base::char16* browser_data_directory) {
275 base::string16 crash_server = GetKaskoCrashServerUrl();
276 base::FilePath crash_reports_base_dir =
277 GetKaskoCrashReportsBaseDir(browser_data_directory);
278
279 return kasko::api::InitializeReporter(
280 endpoint.c_str(),
281 crash_server.c_str(),
282 crash_reports_base_dir.Append(L"Crash Reports").value().c_str(),
283 crash_reports_base_dir.Append(kPermanentlyFailedReportsSubdir)
284 .value()
285 .c_str(),
286 &OnCrashReportUpload,
287 nullptr);
288 }
289
290 void ShutdownKaskoReporter() {
291 kasko::api::ShutdownReporter();
292 }
293
294 bool EnsureTargetProcessValidForCapture(const base::Process& process) {
295 // Ensure the target process's executable is inside the current Chrome
296 // directory.
297 base::FilePath chrome_dir;
298 if (!PathService::Get(base::DIR_EXE, &chrome_dir))
299 return false;
300
301 return chrome_dir.IsParent(GetExeFilePathForProcess(process));
302 }
303
304 void DumpHungProcess(DWORD main_thread_id, const base::string16& channel,
305 const base::char16* hang_type,
306 const base::Process& process) {
307 // Read the Crashpad module annotations for the process.
308 std::vector<kasko::api::CrashKey> annotations;
309 crash_reporter::ReadMainModuleAnnotationsForKasko(process, &annotations);
310
311 // Label the report as a hang report.
312 AddCrashKey(kHangReportCrashKey, hang_type, &annotations);
313
314 // Note: system load is measured as early as possible, as it is potentially
315 // more volatile than wait chain information.
316 // TODO(manzagop): consider continuous load observation, instead of punctual
317 // observation, which may fail to observe load.
318 AddSystemLoadInformation(&annotations);
319
320 // Use the Wait Chain Traversal API to determine the hung thread. Defaults to
321 // UI thread on error. The wait chain may point to a different thread in a
322 // different process for the hung thread.
323 DWORD hung_thread_id = main_thread_id;
324 base::Process hung_process = process.Duplicate();
325
326 base::win::WaitChainNodeVector wait_chain;
327 bool is_deadlock = false;
328 base::string16 thread_chain_failure_reason;
329 DWORD thread_chain_last_error = ERROR_SUCCESS;
330 if (base::win::GetThreadWaitChain(main_thread_id, &wait_chain, &is_deadlock,
331 &thread_chain_failure_reason,
332 &thread_chain_last_error)) {
333 bool found_valid_node =
334 GetLastValidNodeInfo(wait_chain, &hung_process, &hung_thread_id);
335 DCHECK(found_valid_node);
336
337 // Add some interesting data about the wait chain to the crash keys.
338 AddCrashKey(L"hung-process-is-deadlock", is_deadlock ? L"true" : L"false",
339 &annotations);
340 AddWaitChainToCrashKeys(wait_chain, &annotations);
341 AddProcessExeNameToCrashKeys(wait_chain, &annotations);
342 } else {
343 // The call to GetThreadWaitChain() failed. Include the reason inside the
344 // report using crash keys.
345 // TODO(pmonette): Remove this when UMA is added to wait_chain.cc.
346 AddCrashKey(L"hung-process-wait-chain-failure-reason",
347 thread_chain_failure_reason.c_str(), &annotations);
348 AddCrashKey(L"hung-process-wait-chain-last-error",
349 base::UintToString16(thread_chain_last_error).c_str(),
350 &annotations);
351 }
352
353 std::vector<const base::char16*> key_buffers;
354 std::vector<const base::char16*> value_buffers;
355 for (const auto& crash_key : annotations) {
356 key_buffers.push_back(crash_key.name);
357 value_buffers.push_back(crash_key.value);
358 }
359 key_buffers.push_back(nullptr);
360 value_buffers.push_back(nullptr);
361
362 // Synthesize an exception for the hung thread. Populate the record with the
363 // current context of the thread to get the stack trace bucketed on the crash
364 // backend.
365 CONTEXT thread_context = {};
366 EXCEPTION_RECORD exception_record = {};
367 exception_record.ExceptionCode = EXCEPTION_ARRAY_BOUNDS_EXCEEDED;
368 EXCEPTION_POINTERS exception_pointers = {&exception_record, &thread_context};
369
370 base::win::ScopedHandle hung_thread(::OpenThread(
371 THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION,
372 FALSE, hung_thread_id));
373
374 bool have_context = false;
375 if (hung_thread.IsValid()) {
376 DWORD suspend_count = ::SuspendThread(hung_thread.Get());
377 const DWORD kSuspendFailed = static_cast<DWORD>(-1);
378 if (suspend_count != kSuspendFailed) {
379 // Best effort capture of the context.
380 thread_context.ContextFlags = CONTEXT_FLOATING_POINT | CONTEXT_SEGMENTS |
381 CONTEXT_INTEGER | CONTEXT_CONTROL;
382 if (::GetThreadContext(hung_thread.Get(), &thread_context) == TRUE)
383 have_context = true;
384
385 ::ResumeThread(hung_thread.Get());
386 }
387 }
388
389 // TODO(manzagop): consider making the dump-type channel-dependent.
390 if (have_context) {
391 kasko::api::SendReportForProcess(
392 hung_process.Handle(), hung_thread_id, &exception_pointers,
393 kasko::api::LARGER_DUMP_TYPE, key_buffers.data(), value_buffers.data());
394 } else {
395 kasko::api::SendReportForProcess(hung_process.Handle(), 0, nullptr,
396 kasko::api::LARGER_DUMP_TYPE,
397 key_buffers.data(), value_buffers.data());
398 }
399 }
OLDNEW
« no previous file with comments | « chrome/chrome_watcher/kasko_util.h ('k') | third_party/kasko/BUILD.gn » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698