OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/chrome_watcher/kasko_util.h" | |
6 | |
7 #include <sddl.h> | |
8 | |
9 #include <memory> | |
10 #include <set> | |
11 #include <string> | |
12 #include <utility> | |
13 #include <vector> | |
14 | |
15 #include "base/base_paths.h" | |
16 #include "base/bind.h" | |
17 #include "base/callback_helpers.h" | |
18 #include "base/environment.h" | |
19 #include "base/files/file_path.h" | |
20 #include "base/format_macros.h" | |
21 #include "base/macros.h" | |
22 #include "base/path_service.h" | |
23 #include "base/strings/string_number_conversions.h" | |
24 #include "base/strings/string_util.h" | |
25 #include "base/strings/stringprintf.h" | |
26 #include "base/strings/utf_string_conversions.h" | |
27 #include "base/win/wait_chain.h" | |
28 #include "base/win/win_util.h" | |
29 | |
30 #include "chrome/chrome_watcher/chrome_watcher_main_api.h" | |
31 #include "chrome/chrome_watcher/system_load_estimator.h" | |
32 #include "components/crash/content/app/crashpad.h" | |
33 #include "components/memory_pressure/direct_memory_pressure_calculator_win.h" | |
34 #include "components/memory_pressure/memory_pressure_calculator.h" | |
35 #include "syzygy/kasko/api/reporter.h" | |
36 | |
37 namespace { | |
38 | |
39 using MemoryPressureLevel = | |
40 memory_pressure::MemoryPressureCalculator::MemoryPressureLevel; | |
41 | |
42 // Labels a crash report to the server as a hang report. | |
43 const wchar_t kHangReportCrashKey[] = L"hang-report"; | |
44 | |
45 // Helper function for determining the crash server to use. Defaults to the | |
46 // standard crash server, but can be overridden via an environment variable. | |
47 // Enables easy integration testing. | |
48 base::string16 GetKaskoCrashServerUrl() { | |
49 static const char kKaskoCrashServerUrl[] = "KASKO_CRASH_SERVER_URL"; | |
50 static const wchar_t kDefaultKaskoCrashServerUrl[] = | |
51 L"https://clients2.google.com/cr/report"; | |
52 | |
53 std::unique_ptr<base::Environment> env(base::Environment::Create()); | |
54 std::string env_var; | |
55 if (env->GetVar(kKaskoCrashServerUrl, &env_var)) { | |
56 return base::UTF8ToUTF16(env_var); | |
57 } | |
58 return kDefaultKaskoCrashServerUrl; | |
59 } | |
60 | |
61 // Helper function for determining the crash reports directory to use. Defaults | |
62 // to the browser data directory, but can be overridden via an environment | |
63 // variable. Enables easy integration testing. | |
64 base::FilePath GetKaskoCrashReportsBaseDir( | |
65 const base::char16* browser_data_directory) { | |
66 static const char kKaskoCrashReportBaseDir[] = "KASKO_CRASH_REPORTS_BASE_DIR"; | |
67 std::unique_ptr<base::Environment> env(base::Environment::Create()); | |
68 std::string env_var; | |
69 if (env->GetVar(kKaskoCrashReportBaseDir, &env_var)) { | |
70 return base::FilePath(base::UTF8ToUTF16(env_var)); | |
71 } | |
72 return base::FilePath(browser_data_directory); | |
73 } | |
74 | |
75 struct EventSourceDeregisterer { | |
76 using pointer = HANDLE; | |
77 void operator()(HANDLE event_source_handle) const { | |
78 if (!::DeregisterEventSource(event_source_handle)) | |
79 DPLOG(ERROR) << "DeregisterEventSource"; | |
80 } | |
81 }; | |
82 using ScopedEventSourceHandle = | |
83 std::unique_ptr<HANDLE, EventSourceDeregisterer>; | |
84 | |
85 struct SidDeleter { | |
86 using pointer = PSID; | |
87 void operator()(PSID sid) const { | |
88 if (::LocalFree(sid) != nullptr) | |
89 DPLOG(ERROR) << "LocalFree"; | |
90 } | |
91 }; | |
92 using ScopedSid = std::unique_ptr<PSID, SidDeleter>; | |
93 | |
94 void OnCrashReportUpload(void* context, | |
95 const base::char16* report_id, | |
96 const base::char16* minidump_path, | |
97 const base::char16* const* keys, | |
98 const base::char16* const* values) { | |
99 // Open the event source. | |
100 ScopedEventSourceHandle event_source_handle( | |
101 ::RegisterEventSource(nullptr, L"Chrome")); | |
102 if (!event_source_handle) { | |
103 PLOG(ERROR) << "RegisterEventSource"; | |
104 return; | |
105 } | |
106 | |
107 // Get the user's SID for the log record. | |
108 base::string16 sid_string; | |
109 PSID sid = nullptr; | |
110 if (base::win::GetUserSidString(&sid_string) && !sid_string.empty()) { | |
111 if (!::ConvertStringSidToSid(sid_string.c_str(), &sid)) | |
112 DPLOG(ERROR) << "ConvertStringSidToSid"; | |
113 DCHECK(sid); | |
114 } | |
115 // Ensure cleanup on scope exit. | |
116 ScopedSid scoped_sid; | |
117 if (sid) | |
118 scoped_sid.reset(sid); | |
119 | |
120 // Generate the message. | |
121 // Note that the format of this message must match the consumer in | |
122 // chrome/browser/crash_upload_list_win.cc. | |
123 base::string16 message = | |
124 L"Crash uploaded. Id=" + base::string16(report_id) + L"."; | |
125 | |
126 // Matches Omaha. | |
127 const int kCrashUploadEventId = 2; | |
128 | |
129 // Report the event. | |
130 const base::char16* strings[] = {message.c_str()}; | |
131 if (!::ReportEvent(event_source_handle.get(), EVENTLOG_INFORMATION_TYPE, | |
132 0, // category | |
133 kCrashUploadEventId, sid, | |
134 1, // count | |
135 0, strings, nullptr)) { | |
136 DPLOG(ERROR); | |
137 } | |
138 } | |
139 | |
140 void AddCrashKey(const wchar_t *key, const wchar_t *value, | |
141 std::vector<kasko::api::CrashKey> *crash_keys) { | |
142 DCHECK(key); | |
143 DCHECK(value); | |
144 DCHECK(crash_keys); | |
145 | |
146 crash_keys->resize(crash_keys->size() + 1); | |
147 kasko::api::CrashKey& crash_key = crash_keys->back(); | |
148 base::wcslcpy(crash_key.name, key, kasko::api::CrashKey::kNameMaxLength); | |
149 base::wcslcpy(crash_key.value, value, kasko::api::CrashKey::kValueMaxLength); | |
150 } | |
151 | |
152 // Get the |process| and the |thread_id| of the node inside the |wait_chain| | |
153 // that is of type ThreadType and belongs to a process that is valid for the | |
154 // capture of a crash dump. Returns true if such a node was found. | |
155 bool GetLastValidNodeInfo(const base::win::WaitChainNodeVector& wait_chain, | |
156 base::Process* process, | |
157 DWORD* thread_id) { | |
158 // The last thread in the wait chain is nominated as the hung thread. | |
159 base::win::WaitChainNodeVector::const_reverse_iterator it; | |
160 for (it = wait_chain.rbegin(); it != wait_chain.rend(); ++it) { | |
161 if (it->ObjectType != WctThreadType) | |
162 continue; | |
163 | |
164 auto current_process = base::Process::Open(it->ThreadObject.ProcessId); | |
165 if (EnsureTargetProcessValidForCapture(current_process)) { | |
166 *process = std::move(current_process); | |
167 *thread_id = it->ThreadObject.ThreadId; | |
168 return true; | |
169 } | |
170 } | |
171 return false; | |
172 } | |
173 | |
174 // Adds the entire wait chain to |crash_keys|. | |
175 // | |
176 // As an example (key : value): | |
177 // hung-process-wait-chain-00 : Thread 10242 in process 4554 with status Blocked | |
178 // hung-process-wait-chain-01 : Lock of type ThreadWait with status Owned | |
179 // hung-process-wait-chain-02 : Thread 77221 in process 4554 with status Blocked | |
180 // | |
181 void AddWaitChainToCrashKeys(const base::win::WaitChainNodeVector& wait_chain, | |
182 std::vector<kasko::api::CrashKey>* crash_keys) { | |
183 for (size_t i = 0; i < wait_chain.size(); i++) { | |
184 AddCrashKey( | |
185 base::StringPrintf(L"hung-process-wait-chain-%02" PRIuS, i).c_str(), | |
186 base::win::WaitChainNodeToString(wait_chain[i]).c_str(), crash_keys); | |
187 } | |
188 } | |
189 | |
190 base::FilePath GetExeFilePathForProcess(const base::Process& process) { | |
191 wchar_t exe_name[MAX_PATH]; | |
192 DWORD exe_name_len = arraysize(exe_name); | |
193 // Note: requesting the Win32 path format. | |
194 if (::QueryFullProcessImageName(process.Handle(), 0, exe_name, | |
195 &exe_name_len) == 0) { | |
196 DPLOG(ERROR) << "Failed to get executable name for process"; | |
197 return base::FilePath(); | |
198 } | |
199 | |
200 // QueryFullProcessImageName's documentation does not specify behavior when | |
201 // the buffer is too small, but we know that GetModuleFileNameEx succeeds and | |
202 // truncates the returned name in such a case. Given that paths of arbitrary | |
203 // length may exist, the conservative approach is to reject names when | |
204 // the returned length is that of the buffer. | |
205 if (exe_name_len > 0 && exe_name_len < arraysize(exe_name)) | |
206 return base::FilePath(exe_name); | |
207 | |
208 return base::FilePath(); | |
209 } | |
210 | |
211 // Adds the executable base name for each unique pid found in the |wait_chain| | |
212 // to the |crash_keys|. | |
213 void AddProcessExeNameToCrashKeys( | |
214 const base::win::WaitChainNodeVector& wait_chain, | |
215 std::vector<kasko::api::CrashKey>* crash_keys) { | |
216 std::set<DWORD> unique_pids; | |
217 for (size_t i = 0; i < wait_chain.size(); i += 2) | |
218 unique_pids.insert(wait_chain[i].ThreadObject.ProcessId); | |
219 | |
220 for (DWORD pid : unique_pids) { | |
221 // This is racy on the pid but for the purposes of this function, some error | |
222 // threshold can be tolerated. Hopefully the race doesn't happen often. | |
223 base::Process process( | |
224 base::Process::OpenWithAccess(pid, PROCESS_QUERY_LIMITED_INFORMATION)); | |
225 | |
226 base::string16 exe_file_path = L"N/A"; | |
227 if (process.IsValid()) | |
228 exe_file_path = GetExeFilePathForProcess(process).BaseName().value(); | |
229 | |
230 AddCrashKey( | |
231 base::StringPrintf(L"hung-process-wait-chain-pid-%u", pid).c_str(), | |
232 exe_file_path.c_str(), crash_keys); | |
233 } | |
234 } | |
235 | |
236 void AddSystemLoadInformation(std::vector<kasko::api::CrashKey>* crash_keys) { | |
237 DCHECK(crash_keys); | |
238 | |
239 // Add memory pressure level. | |
240 memory_pressure::DirectMemoryPressureCalculator memory_calculator; | |
241 const wchar_t* memory_pressure_level = L""; | |
242 switch (memory_calculator.CalculateCurrentPressureLevel()) { | |
243 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_NONE: | |
244 memory_pressure_level = L"none-or-unknown"; | |
245 break; | |
246 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_MODERATE: | |
247 memory_pressure_level = L"moderate"; | |
248 break; | |
249 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_CRITICAL: | |
250 memory_pressure_level = L"critical"; | |
251 break; | |
252 } | |
253 AddCrashKey(L"memory-pressure", memory_pressure_level, crash_keys); | |
254 | |
255 // Add measures of cpu and disk load. | |
256 chrome_watcher::SystemLoadEstimator::Estimate load_estimate = {}; | |
257 if (!chrome_watcher::SystemLoadEstimator::Measure(&load_estimate)) | |
258 return; | |
259 | |
260 AddCrashKey(L"cpu-load-percent", | |
261 base::IntToString16(load_estimate.cpu_load_pct).c_str(), | |
262 crash_keys); | |
263 AddCrashKey(L"disk-idle-percent", | |
264 base::IntToString16(load_estimate.disk_idle_pct).c_str(), | |
265 crash_keys); | |
266 AddCrashKey(L"disk-avg-queue-len", | |
267 base::IntToString16(load_estimate.avg_disk_queue_len).c_str(), | |
268 crash_keys); | |
269 } | |
270 | |
271 } // namespace | |
272 | |
273 bool InitializeKaskoReporter(const base::string16& endpoint, | |
274 const base::char16* browser_data_directory) { | |
275 base::string16 crash_server = GetKaskoCrashServerUrl(); | |
276 base::FilePath crash_reports_base_dir = | |
277 GetKaskoCrashReportsBaseDir(browser_data_directory); | |
278 | |
279 return kasko::api::InitializeReporter( | |
280 endpoint.c_str(), | |
281 crash_server.c_str(), | |
282 crash_reports_base_dir.Append(L"Crash Reports").value().c_str(), | |
283 crash_reports_base_dir.Append(kPermanentlyFailedReportsSubdir) | |
284 .value() | |
285 .c_str(), | |
286 &OnCrashReportUpload, | |
287 nullptr); | |
288 } | |
289 | |
290 void ShutdownKaskoReporter() { | |
291 kasko::api::ShutdownReporter(); | |
292 } | |
293 | |
294 bool EnsureTargetProcessValidForCapture(const base::Process& process) { | |
295 // Ensure the target process's executable is inside the current Chrome | |
296 // directory. | |
297 base::FilePath chrome_dir; | |
298 if (!PathService::Get(base::DIR_EXE, &chrome_dir)) | |
299 return false; | |
300 | |
301 return chrome_dir.IsParent(GetExeFilePathForProcess(process)); | |
302 } | |
303 | |
304 void DumpHungProcess(DWORD main_thread_id, const base::string16& channel, | |
305 const base::char16* hang_type, | |
306 const base::Process& process) { | |
307 // Read the Crashpad module annotations for the process. | |
308 std::vector<kasko::api::CrashKey> annotations; | |
309 crash_reporter::ReadMainModuleAnnotationsForKasko(process, &annotations); | |
310 | |
311 // Label the report as a hang report. | |
312 AddCrashKey(kHangReportCrashKey, hang_type, &annotations); | |
313 | |
314 // Note: system load is measured as early as possible, as it is potentially | |
315 // more volatile than wait chain information. | |
316 // TODO(manzagop): consider continuous load observation, instead of punctual | |
317 // observation, which may fail to observe load. | |
318 AddSystemLoadInformation(&annotations); | |
319 | |
320 // Use the Wait Chain Traversal API to determine the hung thread. Defaults to | |
321 // UI thread on error. The wait chain may point to a different thread in a | |
322 // different process for the hung thread. | |
323 DWORD hung_thread_id = main_thread_id; | |
324 base::Process hung_process = process.Duplicate(); | |
325 | |
326 base::win::WaitChainNodeVector wait_chain; | |
327 bool is_deadlock = false; | |
328 base::string16 thread_chain_failure_reason; | |
329 DWORD thread_chain_last_error = ERROR_SUCCESS; | |
330 if (base::win::GetThreadWaitChain(main_thread_id, &wait_chain, &is_deadlock, | |
331 &thread_chain_failure_reason, | |
332 &thread_chain_last_error)) { | |
333 bool found_valid_node = | |
334 GetLastValidNodeInfo(wait_chain, &hung_process, &hung_thread_id); | |
335 DCHECK(found_valid_node); | |
336 | |
337 // Add some interesting data about the wait chain to the crash keys. | |
338 AddCrashKey(L"hung-process-is-deadlock", is_deadlock ? L"true" : L"false", | |
339 &annotations); | |
340 AddWaitChainToCrashKeys(wait_chain, &annotations); | |
341 AddProcessExeNameToCrashKeys(wait_chain, &annotations); | |
342 } else { | |
343 // The call to GetThreadWaitChain() failed. Include the reason inside the | |
344 // report using crash keys. | |
345 // TODO(pmonette): Remove this when UMA is added to wait_chain.cc. | |
346 AddCrashKey(L"hung-process-wait-chain-failure-reason", | |
347 thread_chain_failure_reason.c_str(), &annotations); | |
348 AddCrashKey(L"hung-process-wait-chain-last-error", | |
349 base::UintToString16(thread_chain_last_error).c_str(), | |
350 &annotations); | |
351 } | |
352 | |
353 std::vector<const base::char16*> key_buffers; | |
354 std::vector<const base::char16*> value_buffers; | |
355 for (const auto& crash_key : annotations) { | |
356 key_buffers.push_back(crash_key.name); | |
357 value_buffers.push_back(crash_key.value); | |
358 } | |
359 key_buffers.push_back(nullptr); | |
360 value_buffers.push_back(nullptr); | |
361 | |
362 // Synthesize an exception for the hung thread. Populate the record with the | |
363 // current context of the thread to get the stack trace bucketed on the crash | |
364 // backend. | |
365 CONTEXT thread_context = {}; | |
366 EXCEPTION_RECORD exception_record = {}; | |
367 exception_record.ExceptionCode = EXCEPTION_ARRAY_BOUNDS_EXCEEDED; | |
368 EXCEPTION_POINTERS exception_pointers = {&exception_record, &thread_context}; | |
369 | |
370 base::win::ScopedHandle hung_thread(::OpenThread( | |
371 THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION, | |
372 FALSE, hung_thread_id)); | |
373 | |
374 bool have_context = false; | |
375 if (hung_thread.IsValid()) { | |
376 DWORD suspend_count = ::SuspendThread(hung_thread.Get()); | |
377 const DWORD kSuspendFailed = static_cast<DWORD>(-1); | |
378 if (suspend_count != kSuspendFailed) { | |
379 // Best effort capture of the context. | |
380 thread_context.ContextFlags = CONTEXT_FLOATING_POINT | CONTEXT_SEGMENTS | | |
381 CONTEXT_INTEGER | CONTEXT_CONTROL; | |
382 if (::GetThreadContext(hung_thread.Get(), &thread_context) == TRUE) | |
383 have_context = true; | |
384 | |
385 ::ResumeThread(hung_thread.Get()); | |
386 } | |
387 } | |
388 | |
389 // TODO(manzagop): consider making the dump-type channel-dependent. | |
390 if (have_context) { | |
391 kasko::api::SendReportForProcess( | |
392 hung_process.Handle(), hung_thread_id, &exception_pointers, | |
393 kasko::api::LARGER_DUMP_TYPE, key_buffers.data(), value_buffers.data()); | |
394 } else { | |
395 kasko::api::SendReportForProcess(hung_process.Handle(), 0, nullptr, | |
396 kasko::api::LARGER_DUMP_TYPE, | |
397 key_buffers.data(), value_buffers.data()); | |
398 } | |
399 } | |
OLD | NEW |