| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/chrome_watcher/kasko_util.h" | |
| 6 | |
| 7 #include <sddl.h> | |
| 8 | |
| 9 #include <memory> | |
| 10 #include <set> | |
| 11 #include <string> | |
| 12 #include <utility> | |
| 13 #include <vector> | |
| 14 | |
| 15 #include "base/base_paths.h" | |
| 16 #include "base/bind.h" | |
| 17 #include "base/callback_helpers.h" | |
| 18 #include "base/environment.h" | |
| 19 #include "base/files/file_path.h" | |
| 20 #include "base/format_macros.h" | |
| 21 #include "base/macros.h" | |
| 22 #include "base/path_service.h" | |
| 23 #include "base/strings/string_number_conversions.h" | |
| 24 #include "base/strings/string_util.h" | |
| 25 #include "base/strings/stringprintf.h" | |
| 26 #include "base/strings/utf_string_conversions.h" | |
| 27 #include "base/win/wait_chain.h" | |
| 28 #include "base/win/win_util.h" | |
| 29 | |
| 30 #include "chrome/chrome_watcher/chrome_watcher_main_api.h" | |
| 31 #include "chrome/chrome_watcher/system_load_estimator.h" | |
| 32 #include "components/crash/content/app/crashpad.h" | |
| 33 #include "components/memory_pressure/direct_memory_pressure_calculator_win.h" | |
| 34 #include "components/memory_pressure/memory_pressure_calculator.h" | |
| 35 #include "syzygy/kasko/api/reporter.h" | |
| 36 | |
| 37 namespace { | |
| 38 | |
| 39 using MemoryPressureLevel = | |
| 40 memory_pressure::MemoryPressureCalculator::MemoryPressureLevel; | |
| 41 | |
| 42 // Labels a crash report to the server as a hang report. | |
| 43 const wchar_t kHangReportCrashKey[] = L"hang-report"; | |
| 44 | |
| 45 // Helper function for determining the crash server to use. Defaults to the | |
| 46 // standard crash server, but can be overridden via an environment variable. | |
| 47 // Enables easy integration testing. | |
| 48 base::string16 GetKaskoCrashServerUrl() { | |
| 49 static const char kKaskoCrashServerUrl[] = "KASKO_CRASH_SERVER_URL"; | |
| 50 static const wchar_t kDefaultKaskoCrashServerUrl[] = | |
| 51 L"https://clients2.google.com/cr/report"; | |
| 52 | |
| 53 std::unique_ptr<base::Environment> env(base::Environment::Create()); | |
| 54 std::string env_var; | |
| 55 if (env->GetVar(kKaskoCrashServerUrl, &env_var)) { | |
| 56 return base::UTF8ToUTF16(env_var); | |
| 57 } | |
| 58 return kDefaultKaskoCrashServerUrl; | |
| 59 } | |
| 60 | |
| 61 // Helper function for determining the crash reports directory to use. Defaults | |
| 62 // to the browser data directory, but can be overridden via an environment | |
| 63 // variable. Enables easy integration testing. | |
| 64 base::FilePath GetKaskoCrashReportsBaseDir( | |
| 65 const base::char16* browser_data_directory) { | |
| 66 static const char kKaskoCrashReportBaseDir[] = "KASKO_CRASH_REPORTS_BASE_DIR"; | |
| 67 std::unique_ptr<base::Environment> env(base::Environment::Create()); | |
| 68 std::string env_var; | |
| 69 if (env->GetVar(kKaskoCrashReportBaseDir, &env_var)) { | |
| 70 return base::FilePath(base::UTF8ToUTF16(env_var)); | |
| 71 } | |
| 72 return base::FilePath(browser_data_directory); | |
| 73 } | |
| 74 | |
| 75 struct EventSourceDeregisterer { | |
| 76 using pointer = HANDLE; | |
| 77 void operator()(HANDLE event_source_handle) const { | |
| 78 if (!::DeregisterEventSource(event_source_handle)) | |
| 79 DPLOG(ERROR) << "DeregisterEventSource"; | |
| 80 } | |
| 81 }; | |
| 82 using ScopedEventSourceHandle = | |
| 83 std::unique_ptr<HANDLE, EventSourceDeregisterer>; | |
| 84 | |
| 85 struct SidDeleter { | |
| 86 using pointer = PSID; | |
| 87 void operator()(PSID sid) const { | |
| 88 if (::LocalFree(sid) != nullptr) | |
| 89 DPLOG(ERROR) << "LocalFree"; | |
| 90 } | |
| 91 }; | |
| 92 using ScopedSid = std::unique_ptr<PSID, SidDeleter>; | |
| 93 | |
| 94 void OnCrashReportUpload(void* context, | |
| 95 const base::char16* report_id, | |
| 96 const base::char16* minidump_path, | |
| 97 const base::char16* const* keys, | |
| 98 const base::char16* const* values) { | |
| 99 // Open the event source. | |
| 100 ScopedEventSourceHandle event_source_handle( | |
| 101 ::RegisterEventSource(nullptr, L"Chrome")); | |
| 102 if (!event_source_handle) { | |
| 103 PLOG(ERROR) << "RegisterEventSource"; | |
| 104 return; | |
| 105 } | |
| 106 | |
| 107 // Get the user's SID for the log record. | |
| 108 base::string16 sid_string; | |
| 109 PSID sid = nullptr; | |
| 110 if (base::win::GetUserSidString(&sid_string) && !sid_string.empty()) { | |
| 111 if (!::ConvertStringSidToSid(sid_string.c_str(), &sid)) | |
| 112 DPLOG(ERROR) << "ConvertStringSidToSid"; | |
| 113 DCHECK(sid); | |
| 114 } | |
| 115 // Ensure cleanup on scope exit. | |
| 116 ScopedSid scoped_sid; | |
| 117 if (sid) | |
| 118 scoped_sid.reset(sid); | |
| 119 | |
| 120 // Generate the message. | |
| 121 // Note that the format of this message must match the consumer in | |
| 122 // chrome/browser/crash_upload_list_win.cc. | |
| 123 base::string16 message = | |
| 124 L"Crash uploaded. Id=" + base::string16(report_id) + L"."; | |
| 125 | |
| 126 // Matches Omaha. | |
| 127 const int kCrashUploadEventId = 2; | |
| 128 | |
| 129 // Report the event. | |
| 130 const base::char16* strings[] = {message.c_str()}; | |
| 131 if (!::ReportEvent(event_source_handle.get(), EVENTLOG_INFORMATION_TYPE, | |
| 132 0, // category | |
| 133 kCrashUploadEventId, sid, | |
| 134 1, // count | |
| 135 0, strings, nullptr)) { | |
| 136 DPLOG(ERROR); | |
| 137 } | |
| 138 } | |
| 139 | |
| 140 void AddCrashKey(const wchar_t *key, const wchar_t *value, | |
| 141 std::vector<kasko::api::CrashKey> *crash_keys) { | |
| 142 DCHECK(key); | |
| 143 DCHECK(value); | |
| 144 DCHECK(crash_keys); | |
| 145 | |
| 146 crash_keys->resize(crash_keys->size() + 1); | |
| 147 kasko::api::CrashKey& crash_key = crash_keys->back(); | |
| 148 base::wcslcpy(crash_key.name, key, kasko::api::CrashKey::kNameMaxLength); | |
| 149 base::wcslcpy(crash_key.value, value, kasko::api::CrashKey::kValueMaxLength); | |
| 150 } | |
| 151 | |
| 152 // Get the |process| and the |thread_id| of the node inside the |wait_chain| | |
| 153 // that is of type ThreadType and belongs to a process that is valid for the | |
| 154 // capture of a crash dump. Returns true if such a node was found. | |
| 155 bool GetLastValidNodeInfo(const base::win::WaitChainNodeVector& wait_chain, | |
| 156 base::Process* process, | |
| 157 DWORD* thread_id) { | |
| 158 // The last thread in the wait chain is nominated as the hung thread. | |
| 159 base::win::WaitChainNodeVector::const_reverse_iterator it; | |
| 160 for (it = wait_chain.rbegin(); it != wait_chain.rend(); ++it) { | |
| 161 if (it->ObjectType != WctThreadType) | |
| 162 continue; | |
| 163 | |
| 164 auto current_process = base::Process::Open(it->ThreadObject.ProcessId); | |
| 165 if (EnsureTargetProcessValidForCapture(current_process)) { | |
| 166 *process = std::move(current_process); | |
| 167 *thread_id = it->ThreadObject.ThreadId; | |
| 168 return true; | |
| 169 } | |
| 170 } | |
| 171 return false; | |
| 172 } | |
| 173 | |
| 174 // Adds the entire wait chain to |crash_keys|. | |
| 175 // | |
| 176 // As an example (key : value): | |
| 177 // hung-process-wait-chain-00 : Thread 10242 in process 4554 with status Blocked | |
| 178 // hung-process-wait-chain-01 : Lock of type ThreadWait with status Owned | |
| 179 // hung-process-wait-chain-02 : Thread 77221 in process 4554 with status Blocked | |
| 180 // | |
| 181 void AddWaitChainToCrashKeys(const base::win::WaitChainNodeVector& wait_chain, | |
| 182 std::vector<kasko::api::CrashKey>* crash_keys) { | |
| 183 for (size_t i = 0; i < wait_chain.size(); i++) { | |
| 184 AddCrashKey( | |
| 185 base::StringPrintf(L"hung-process-wait-chain-%02" PRIuS, i).c_str(), | |
| 186 base::win::WaitChainNodeToString(wait_chain[i]).c_str(), crash_keys); | |
| 187 } | |
| 188 } | |
| 189 | |
| 190 base::FilePath GetExeFilePathForProcess(const base::Process& process) { | |
| 191 wchar_t exe_name[MAX_PATH]; | |
| 192 DWORD exe_name_len = arraysize(exe_name); | |
| 193 // Note: requesting the Win32 path format. | |
| 194 if (::QueryFullProcessImageName(process.Handle(), 0, exe_name, | |
| 195 &exe_name_len) == 0) { | |
| 196 DPLOG(ERROR) << "Failed to get executable name for process"; | |
| 197 return base::FilePath(); | |
| 198 } | |
| 199 | |
| 200 // QueryFullProcessImageName's documentation does not specify behavior when | |
| 201 // the buffer is too small, but we know that GetModuleFileNameEx succeeds and | |
| 202 // truncates the returned name in such a case. Given that paths of arbitrary | |
| 203 // length may exist, the conservative approach is to reject names when | |
| 204 // the returned length is that of the buffer. | |
| 205 if (exe_name_len > 0 && exe_name_len < arraysize(exe_name)) | |
| 206 return base::FilePath(exe_name); | |
| 207 | |
| 208 return base::FilePath(); | |
| 209 } | |
| 210 | |
| 211 // Adds the executable base name for each unique pid found in the |wait_chain| | |
| 212 // to the |crash_keys|. | |
| 213 void AddProcessExeNameToCrashKeys( | |
| 214 const base::win::WaitChainNodeVector& wait_chain, | |
| 215 std::vector<kasko::api::CrashKey>* crash_keys) { | |
| 216 std::set<DWORD> unique_pids; | |
| 217 for (size_t i = 0; i < wait_chain.size(); i += 2) | |
| 218 unique_pids.insert(wait_chain[i].ThreadObject.ProcessId); | |
| 219 | |
| 220 for (DWORD pid : unique_pids) { | |
| 221 // This is racy on the pid but for the purposes of this function, some error | |
| 222 // threshold can be tolerated. Hopefully the race doesn't happen often. | |
| 223 base::Process process( | |
| 224 base::Process::OpenWithAccess(pid, PROCESS_QUERY_LIMITED_INFORMATION)); | |
| 225 | |
| 226 base::string16 exe_file_path = L"N/A"; | |
| 227 if (process.IsValid()) | |
| 228 exe_file_path = GetExeFilePathForProcess(process).BaseName().value(); | |
| 229 | |
| 230 AddCrashKey( | |
| 231 base::StringPrintf(L"hung-process-wait-chain-pid-%u", pid).c_str(), | |
| 232 exe_file_path.c_str(), crash_keys); | |
| 233 } | |
| 234 } | |
| 235 | |
| 236 void AddSystemLoadInformation(std::vector<kasko::api::CrashKey>* crash_keys) { | |
| 237 DCHECK(crash_keys); | |
| 238 | |
| 239 // Add memory pressure level. | |
| 240 memory_pressure::DirectMemoryPressureCalculator memory_calculator; | |
| 241 const wchar_t* memory_pressure_level = L""; | |
| 242 switch (memory_calculator.CalculateCurrentPressureLevel()) { | |
| 243 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_NONE: | |
| 244 memory_pressure_level = L"none-or-unknown"; | |
| 245 break; | |
| 246 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_MODERATE: | |
| 247 memory_pressure_level = L"moderate"; | |
| 248 break; | |
| 249 case MemoryPressureLevel::MEMORY_PRESSURE_LEVEL_CRITICAL: | |
| 250 memory_pressure_level = L"critical"; | |
| 251 break; | |
| 252 } | |
| 253 AddCrashKey(L"memory-pressure", memory_pressure_level, crash_keys); | |
| 254 | |
| 255 // Add measures of cpu and disk load. | |
| 256 chrome_watcher::SystemLoadEstimator::Estimate load_estimate = {}; | |
| 257 if (!chrome_watcher::SystemLoadEstimator::Measure(&load_estimate)) | |
| 258 return; | |
| 259 | |
| 260 AddCrashKey(L"cpu-load-percent", | |
| 261 base::IntToString16(load_estimate.cpu_load_pct).c_str(), | |
| 262 crash_keys); | |
| 263 AddCrashKey(L"disk-idle-percent", | |
| 264 base::IntToString16(load_estimate.disk_idle_pct).c_str(), | |
| 265 crash_keys); | |
| 266 AddCrashKey(L"disk-avg-queue-len", | |
| 267 base::IntToString16(load_estimate.avg_disk_queue_len).c_str(), | |
| 268 crash_keys); | |
| 269 } | |
| 270 | |
| 271 } // namespace | |
| 272 | |
| 273 bool InitializeKaskoReporter(const base::string16& endpoint, | |
| 274 const base::char16* browser_data_directory) { | |
| 275 base::string16 crash_server = GetKaskoCrashServerUrl(); | |
| 276 base::FilePath crash_reports_base_dir = | |
| 277 GetKaskoCrashReportsBaseDir(browser_data_directory); | |
| 278 | |
| 279 return kasko::api::InitializeReporter( | |
| 280 endpoint.c_str(), | |
| 281 crash_server.c_str(), | |
| 282 crash_reports_base_dir.Append(L"Crash Reports").value().c_str(), | |
| 283 crash_reports_base_dir.Append(kPermanentlyFailedReportsSubdir) | |
| 284 .value() | |
| 285 .c_str(), | |
| 286 &OnCrashReportUpload, | |
| 287 nullptr); | |
| 288 } | |
| 289 | |
| 290 void ShutdownKaskoReporter() { | |
| 291 kasko::api::ShutdownReporter(); | |
| 292 } | |
| 293 | |
| 294 bool EnsureTargetProcessValidForCapture(const base::Process& process) { | |
| 295 // Ensure the target process's executable is inside the current Chrome | |
| 296 // directory. | |
| 297 base::FilePath chrome_dir; | |
| 298 if (!PathService::Get(base::DIR_EXE, &chrome_dir)) | |
| 299 return false; | |
| 300 | |
| 301 return chrome_dir.IsParent(GetExeFilePathForProcess(process)); | |
| 302 } | |
| 303 | |
| 304 void DumpHungProcess(DWORD main_thread_id, const base::string16& channel, | |
| 305 const base::char16* hang_type, | |
| 306 const base::Process& process) { | |
| 307 // Read the Crashpad module annotations for the process. | |
| 308 std::vector<kasko::api::CrashKey> annotations; | |
| 309 crash_reporter::ReadMainModuleAnnotationsForKasko(process, &annotations); | |
| 310 | |
| 311 // Label the report as a hang report. | |
| 312 AddCrashKey(kHangReportCrashKey, hang_type, &annotations); | |
| 313 | |
| 314 // Note: system load is measured as early as possible, as it is potentially | |
| 315 // more volatile than wait chain information. | |
| 316 // TODO(manzagop): consider continuous load observation, instead of punctual | |
| 317 // observation, which may fail to observe load. | |
| 318 AddSystemLoadInformation(&annotations); | |
| 319 | |
| 320 // Use the Wait Chain Traversal API to determine the hung thread. Defaults to | |
| 321 // UI thread on error. The wait chain may point to a different thread in a | |
| 322 // different process for the hung thread. | |
| 323 DWORD hung_thread_id = main_thread_id; | |
| 324 base::Process hung_process = process.Duplicate(); | |
| 325 | |
| 326 base::win::WaitChainNodeVector wait_chain; | |
| 327 bool is_deadlock = false; | |
| 328 base::string16 thread_chain_failure_reason; | |
| 329 DWORD thread_chain_last_error = ERROR_SUCCESS; | |
| 330 if (base::win::GetThreadWaitChain(main_thread_id, &wait_chain, &is_deadlock, | |
| 331 &thread_chain_failure_reason, | |
| 332 &thread_chain_last_error)) { | |
| 333 bool found_valid_node = | |
| 334 GetLastValidNodeInfo(wait_chain, &hung_process, &hung_thread_id); | |
| 335 DCHECK(found_valid_node); | |
| 336 | |
| 337 // Add some interesting data about the wait chain to the crash keys. | |
| 338 AddCrashKey(L"hung-process-is-deadlock", is_deadlock ? L"true" : L"false", | |
| 339 &annotations); | |
| 340 AddWaitChainToCrashKeys(wait_chain, &annotations); | |
| 341 AddProcessExeNameToCrashKeys(wait_chain, &annotations); | |
| 342 } else { | |
| 343 // The call to GetThreadWaitChain() failed. Include the reason inside the | |
| 344 // report using crash keys. | |
| 345 // TODO(pmonette): Remove this when UMA is added to wait_chain.cc. | |
| 346 AddCrashKey(L"hung-process-wait-chain-failure-reason", | |
| 347 thread_chain_failure_reason.c_str(), &annotations); | |
| 348 AddCrashKey(L"hung-process-wait-chain-last-error", | |
| 349 base::UintToString16(thread_chain_last_error).c_str(), | |
| 350 &annotations); | |
| 351 } | |
| 352 | |
| 353 std::vector<const base::char16*> key_buffers; | |
| 354 std::vector<const base::char16*> value_buffers; | |
| 355 for (const auto& crash_key : annotations) { | |
| 356 key_buffers.push_back(crash_key.name); | |
| 357 value_buffers.push_back(crash_key.value); | |
| 358 } | |
| 359 key_buffers.push_back(nullptr); | |
| 360 value_buffers.push_back(nullptr); | |
| 361 | |
| 362 // Synthesize an exception for the hung thread. Populate the record with the | |
| 363 // current context of the thread to get the stack trace bucketed on the crash | |
| 364 // backend. | |
| 365 CONTEXT thread_context = {}; | |
| 366 EXCEPTION_RECORD exception_record = {}; | |
| 367 exception_record.ExceptionCode = EXCEPTION_ARRAY_BOUNDS_EXCEEDED; | |
| 368 EXCEPTION_POINTERS exception_pointers = {&exception_record, &thread_context}; | |
| 369 | |
| 370 base::win::ScopedHandle hung_thread(::OpenThread( | |
| 371 THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION, | |
| 372 FALSE, hung_thread_id)); | |
| 373 | |
| 374 bool have_context = false; | |
| 375 if (hung_thread.IsValid()) { | |
| 376 DWORD suspend_count = ::SuspendThread(hung_thread.Get()); | |
| 377 const DWORD kSuspendFailed = static_cast<DWORD>(-1); | |
| 378 if (suspend_count != kSuspendFailed) { | |
| 379 // Best effort capture of the context. | |
| 380 thread_context.ContextFlags = CONTEXT_FLOATING_POINT | CONTEXT_SEGMENTS | | |
| 381 CONTEXT_INTEGER | CONTEXT_CONTROL; | |
| 382 if (::GetThreadContext(hung_thread.Get(), &thread_context) == TRUE) | |
| 383 have_context = true; | |
| 384 | |
| 385 ::ResumeThread(hung_thread.Get()); | |
| 386 } | |
| 387 } | |
| 388 | |
| 389 // TODO(manzagop): consider making the dump-type channel-dependent. | |
| 390 if (have_context) { | |
| 391 kasko::api::SendReportForProcess( | |
| 392 hung_process.Handle(), hung_thread_id, &exception_pointers, | |
| 393 kasko::api::LARGER_DUMP_TYPE, key_buffers.data(), value_buffers.data()); | |
| 394 } else { | |
| 395 kasko::api::SendReportForProcess(hung_process.Handle(), 0, nullptr, | |
| 396 kasko::api::LARGER_DUMP_TYPE, | |
| 397 key_buffers.data(), value_buffers.data()); | |
| 398 } | |
| 399 } | |
| OLD | NEW |