Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Unified Diff: base/profiler/native_stack_sampler_mac.cc

Issue 2702463003: NativeStackSampler implementation for Mac. (Closed)
Patch Set: fix Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/profiler/native_stack_sampler_mac.cc
diff --git a/base/profiler/native_stack_sampler_mac.cc b/base/profiler/native_stack_sampler_mac.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7a7bd0cd10f6949bb99da4549c767c91d8fcdab8
--- /dev/null
+++ b/base/profiler/native_stack_sampler_mac.cc
@@ -0,0 +1,527 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/profiler/native_stack_sampler.h"
+
+#include <dlfcn.h>
+#include <libkern/OSByteOrder.h>
+#include <libunwind.h>
+#include <mach-o/swap.h>
+#include <mach/kern_return.h>
+#include <mach/mach.h>
+#include <mach/thread_act.h>
+#include <pthread.h>
+#include <sys/syslimits.h>
+
+#include <map>
+#include <memory>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/memory/ptr_util.h"
+#include "base/strings/string_number_conversions.h"
+
+namespace base {
+
+namespace {
+
+// Stack walking --------------------------------------------------------------
+
+// Copy of x86_64 thread context structure from x86_thread_state64_t type.
+// Copied struct since fields can have different names on different versions of
+// Darwin.
Mark Mentovai 2017/02/17 05:21:05 ? It’s never going to not be the double-underscor
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
+struct ThreadContext {
+ uint64_t rax;
+ uint64_t rbx;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rdi;
+ uint64_t rsi;
+ uint64_t rbp;
+ uint64_t rsp;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t rip;
+ uint64_t rflags;
+ uint64_t cs;
+ uint64_t fs;
+ uint64_t gs;
+};
+
+// Fills |state| with |target_thread|'s context.
+//
+// Note that this is called while a thread is suspended. Make very very sure
+// that no shared resources (e.g. memory allocators) are used for the duration
+// of this function.
+bool GetThreadContext(thread_act_t target_thread, ThreadContext* state) {
+ mach_msg_type_number_t count =
+ static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
+ return thread_get_state(target_thread, x86_THREAD_STATE64,
+ reinterpret_cast<thread_state_t>(state),
+ &count) == KERN_SUCCESS;
+}
+
+// If the value at |pointer| points to the original stack, rewrite it to point
+// to the corresponding location in the copied stack.
+//
+// Note that this is called while a thread is suspended. Make very very sure
+// that no shared resources (e.g. memory allocators) are used for the duration
+// of this function.
+uint64_t RewritePointerIfInOriginalStack(uint64_t* original_stack_bottom,
+ uint64_t* original_stack_top,
+ uint64_t* stack_copy_bottom,
+ uint64_t pointer) {
+ uint64_t original_stack_bottom_int =
+ reinterpret_cast<uint64_t>(original_stack_bottom);
+ uint64_t original_stack_top_int =
+ reinterpret_cast<uint64_t>(original_stack_top);
+ uint64_t stack_copy_bottom_int =
+ reinterpret_cast<uint64_t>(stack_copy_bottom);
+
+ if ((pointer < original_stack_bottom_int) ||
+ (pointer >= original_stack_top_int)) {
+ return pointer;
+ }
+
+ return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
+}
+
+// Copy the stack to a buffer while rewriting possible pointers to locations
+// within the stack to point to the corresponding locations in the copy. This is
+// necessary to handle stack frames with dynamic stack allocation, where a
+// pointer to the beginning of the dynamic allocation area is stored on the
+// stack and/or in a non-volatile register.
+//
+// Eager rewriting of anything that looks like a pointer to the stack, as done
+// in this function, does not adversely affect the stack unwinding. The only
+// other values on the stack the unwinding depends on are return addresses,
+// which should not point within the stack memory. The rewriting is guaranteed
+// to catch all pointers because the stacks are guaranteed by the ABI to be
+// sizeof(void*) aligned.
+//
+// Note that this is called while a thread is suspended. Make very very sure
+// that no shared resources (e.g. memory allocators) are used for the duration
+// of this function.
+void CopyStackAndRewritePointers(void* dest,
+ void* from,
+ void* to,
+ ThreadContext* thread_context)
+ NO_SANITIZE("address") {
+ uint64_t* original_stack_bottom = static_cast<uint64_t*>(from);
+ uint64_t* original_stack_top = static_cast<uint64_t*>(to);
+ uint64_t* stack_copy_bottom = static_cast<uint64_t*>(dest);
+
+ size_t count = original_stack_top - original_stack_bottom;
+ for (size_t pos = 0; pos < count; ++pos) {
+ stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
+ original_stack_bottom, original_stack_top, stack_copy_bottom,
+ original_stack_bottom[pos]);
+ }
+
+ thread_context->rbp =
+ RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
+ stack_copy_bottom, thread_context->rbp);
+ thread_context->rsp =
+ RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
+ stack_copy_bottom, thread_context->rsp);
+}
+
+const char* LibSystemKernelName() {
+ static char path[PATH_MAX];
+ static char* name = nullptr;
+ if (name)
+ return name;
+
+ Dl_info info;
+ dladdr(reinterpret_cast<void*>(_exit), &info);
+ strncpy(path, info.dli_fname, PATH_MAX);
+ name = path;
+ DCHECK_EQ(std::string(name),
+ std::string("/usr/lib/system/libsystem_kernel.dylib"));
+ return name;
+}
+
+enum StackWalkResult : int {
+ ERROR = -1,
+ SUCCESS,
+ SYSCALL,
+};
+
+// Walks the stack represented by |unwind_context|, calling back to the provided
+// lambda for each frame.
+template <typename StackFrameCallback>
+StackWalkResult WalkStackFromContext(unw_context_t* unwind_context,
+ const StackFrameCallback& callback) {
+ unw_cursor_t unwind_cursor;
+ unw_init_local(&unwind_cursor, unwind_context);
+
+ int step_result;
+ unw_word_t ip;
+ size_t frames = 0;
+ do {
+ ++frames;
+ unw_get_reg(&unwind_cursor, UNW_REG_IP, &ip);
+
+ callback(static_cast<uintptr_t>(ip));
+
+ step_result = unw_step(&unwind_cursor);
+ } while (step_result > 0);
+
+ if (step_result != 0)
+ return StackWalkResult::ERROR;
+
+ Dl_info info;
+ if (frames == 1 && dladdr(reinterpret_cast<void*>(ip), &info) != 0 &&
+ strcmp(info.dli_fname, LibSystemKernelName()) == 0) {
+ return StackWalkResult::SYSCALL;
+ }
+
+ return StackWalkResult::SUCCESS;
+}
+
+// Walks the stack represented by |thread_context|, calling back to the provided
+// lambda for each frame.
+template <typename StackFrameCallback>
+void WalkStack(const ThreadContext& thread_context,
+ const StackFrameCallback& callback) {
+ // This uses libunwind to walk the stack. libunwind is designed to be used for
+ // a thread to walk its own stack. This creates two problems.
+
+ // Problem 1: There is no official way to create a unw_context other than to
+ // create it from the current state of the current thread's stack. To get
+ // around this, forge a context. A unw_context is just a copy of the register
+ // file followed by the instruction pointer. Coincidentally, the first 17
Mark Mentovai 2017/02/17 05:21:05 “register file plus instruction pointer” is weird
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
+ // items of the ThreadContext type are exactly that!
+ unw_context_t unwind_context;
+ memcpy(&unwind_context, &thread_context, sizeof(uint64_t) * 17);
+ StackWalkResult result = WalkStackFromContext(&unwind_context, callback);
+
+ if (result == StackWalkResult::SYSCALL) {
+ // Problem 2: Because libunwind is designed to be triggered by user code on
+ // their own thread, if it hits a library that has no unwind info for the
+ // function that is being executed, it just stops. This isn't a problem in
+ // the normal case, but in this case, it's quite possible that the stack
+ // being walked is stopped in a function that bridges to the kernel and thus
+ // is missing the unwind info.
+ //
+ // If so, cheat by manually unwinding one stack frame and trying again.
+ unwind_context.data[7] = thread_context.rsp + 8; // rsp++
+ unwind_context.data[16] =
+ *reinterpret_cast<uint64_t*>(thread_context.rsp); // rip = *rsp
+ WalkStackFromContext(&unwind_context, callback);
+ }
+}
+
+// Module identifiers ---------------------------------------------------------
+
+// Helper that swaps byte order in |x| if |swap| flag is set.
+uint32_t SwapIfBig32(uint32_t x, bool swap) {
Mark Mentovai 2017/02/17 05:21:05 You don’t need any of this swapping stuff.
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
+ if (swap)
+ return OSSwapBigToHostInt32(x);
+ return x;
+}
+
+// Returns the offset in bytes where the x86_64 header is located in a binary
+// loaded at |module_addr|. Returns 0 if |module_addr| is not a valid FAT
+// Mach-O binary or has not been built for x86_64.
+off_t GetMach64HeaderOffset(const void* module_addr) {
Mark Mentovai 2017/02/17 05:21:05 You don’t need any of this fat stuff either.
Avi (use Gerrit) 2017/02/17 17:18:13 Acknowledged.
+ const fat_header* header = reinterpret_cast<const fat_header*>(module_addr);
+ if (header->magic != FAT_MAGIC && header->magic != FAT_CIGAM)
+ return 0;
+
+ // Search all FAT architectures for x86_64.
+ const fat_arch* fat_arches = reinterpret_cast<const fat_arch*>(
+ reinterpret_cast<const uint8_t*>(module_addr) + sizeof(header));
+ uint32_t n_arches = OSSwapBigToHostInt32(header->nfat_arch);
+ for (uint32_t i = 0; i < n_arches; ++i) {
+ const fat_arch& arch = fat_arches[i];
+ if (OSSwapBigToHostInt32(arch.cputype) == CPU_TYPE_X86_64)
+ return OSSwapBigToHostInt32(arch.offset);
+ }
+ return 0;
+}
+
+// Returns true if the Mach-O binary at |module_addr| was built specifically for
+// the x86_64 CPU architecture.
+bool IsX64Header(const void* module_addr) {
+ const mach_header_64* header =
+ reinterpret_cast<const mach_header_64*>(module_addr);
+ if (header->magic != MH_MAGIC_64 && header->magic != MH_CIGAM_64)
+ return false;
+ bool swap = header->magic == MH_CIGAM_64;
+ return SwapIfBig32(header->cputype, swap) == CPU_TYPE_X86_64;
+}
+
+// Fills |id| with the UUID of the x86_64 Mach-O binary loaded at |module_addr|.
+// |offset| is the offset in bytes into |module_addr| where the x86_64 header is
+// located. |offset| is only relevant if the binary is FAT and contains multiple
+// architecture headers. Returns false if the header is malformed or the header
+// does not specify the UUID load command.
+bool GetX64UUIDAt(const void* module_addr, unsigned char* id, off_t offset) {
+ const mach_header_64* header = reinterpret_cast<const mach_header_64*>(
+ reinterpret_cast<const uint8_t*>(module_addr) + offset);
+ if (header->magic != MH_MAGIC_64 && header->magic != MH_CIGAM_64)
+ return false;
+
+ bool swap = header->magic == MH_CIGAM_64;
+ // Search all load commands for UUID command.
+ offset += sizeof(mach_header_64);
+ for (uint32_t i = 0; i < SwapIfBig32(header->ncmds, swap); ++i) {
+ const load_command* current_cmd = reinterpret_cast<const load_command*>(
Mark Mentovai 2017/02/17 05:21:05 This loop needs to be cognizant of not exceeding h
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
+ reinterpret_cast<const uint8_t*>(module_addr) + offset);
+
+ if (SwapIfBig32(current_cmd->cmd, swap) == LC_UUID) {
+ const uuid_command* uuid_cmd =
Mark Mentovai 2017/02/17 05:21:05 Also need to check that current_cmd->cmdsize is at
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
+ reinterpret_cast<const uuid_command*>(current_cmd);
+ static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
+ "UUID field of UUID command should be 16 bytes.");
+ memcpy(id, &uuid_cmd->uuid, sizeof(uuid_t));
Mark Mentovai 2017/02/17 05:21:05 (if you were swapping, and you’re not, this would
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
+ return true;
+ }
+ offset += SwapIfBig32(current_cmd->cmdsize, swap);
+ }
+ return false;
+}
+
+// Fills |id| with the Mach-O UUID retrieved from Mach-O binary loaded at
+// |module_addr|. This function returns false if the binary was not built for
+// X86_64 or if the UUID cannot be found.
+bool GetUUID(const void* module_addr, unsigned char* id) {
Mark Mentovai 2017/02/17 05:21:05 (this is the only caller of the swappy and fatty f
Avi (use Gerrit) 2017/02/17 17:18:13 Acknowledged.
+ off_t offset = 0;
+ // If the module is not x86_64 exclusive, it could be a module that supports
+ // multiple architectures. In that case, the appropriate header will be at
+ // some non-zero offset.
+ if (!IsX64Header(module_addr) &&
+ !(offset = GetMach64HeaderOffset(module_addr))) {
+ return false;
+ }
+ return GetX64UUIDAt(module_addr, id, offset);
+}
+
+// Returns the hex encoding of a 16-byte ID for the binary loaded at
+// |module_addr|. Returns an empty string if the UUID cannot be found at
+// |module_addr|.
+std::string GetUniqueId(const void* module_addr) {
Mark Mentovai 2017/02/17 05:21:05 (this is the only caller of the caller of the swap
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
+ unsigned char id[sizeof(uuid_t)];
+ if (!GetUUID(module_addr, id))
+ return "";
+ return HexEncode(id, sizeof(uuid_t));
+}
+
+// Gets the index for the Module containing |instruction_pointer| in
+// |modules|, adding it if it's not already present. Returns
+// StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
+// determined for |module|.
+size_t GetModuleIndex(const uintptr_t instruction_pointer,
+ std::vector<StackSamplingProfiler::Module>* modules,
+ std::map<const void*, size_t>* profile_module_index) {
+ Dl_info inf;
+ if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
+ return StackSamplingProfiler::Frame::kUnknownModuleIndex;
+
+ auto module_index = profile_module_index->find(inf.dli_fbase);
+ if (module_index == profile_module_index->end()) {
+ StackSamplingProfiler::Module module(
+ reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
Mark Mentovai 2017/02/17 05:21:05 (this is the only caller of the caller of the call
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
+ base::FilePath(inf.dli_fname));
+ modules->push_back(module);
+ module_index =
+ profile_module_index
+ ->insert(std::make_pair(inf.dli_fbase, modules->size() - 1))
+ .first;
+ }
+ return module_index->second;
+}
+
+// ScopedSuspendThread --------------------------------------------------------
+
+// Suspends a thread for the lifetime of the object.
+class ScopedSuspendThread {
+ public:
+ explicit ScopedSuspendThread(mach_port_t thread_port);
+ ~ScopedSuspendThread();
+
+ bool was_successful() const { return was_successful_; }
+
+ private:
+ mach_port_t thread_port_;
+ bool was_successful_;
+
+ DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
+};
+
+ScopedSuspendThread::ScopedSuspendThread(mach_port_t thread_port)
+ : thread_port_(thread_port),
+ was_successful_(thread_suspend(thread_port) == KERN_SUCCESS) {}
+
+ScopedSuspendThread::~ScopedSuspendThread() {
+ if (!was_successful_)
+ return;
+
+ kern_return_t resume_result = thread_resume(thread_port_);
+ CHECK_EQ(KERN_SUCCESS, resume_result) << "thread_resume failed";
+}
+
+// NativeStackSamplerMac ------------------------------------------------------
+
+class NativeStackSamplerMac : public NativeStackSampler {
+ public:
+ NativeStackSamplerMac(mach_port_t thread_port,
+ AnnotateCallback annotator,
+ NativeStackSamplerTestDelegate* test_delegate);
+ ~NativeStackSamplerMac() override;
+
+ // StackSamplingProfiler::NativeStackSampler:
+ void ProfileRecordingStarting(
+ std::vector<StackSamplingProfiler::Module>* modules) override;
+ void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
+ void ProfileRecordingStopped() override;
+
+ private:
+ enum {
+ // Intended to hold the largest stack used by Chrome. The default macOS main
+ // thread stack size is 8 MB, and this allows for expansion if it occurs.
+ kStackCopyBufferSize = 12 * 1024 * 1024
+ };
+
+ // Suspends the thread with |thread_port_|, copies its stack and resumes the
+ // thread, then records the stack frames and associated modules into |sample|.
+ void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample);
+
+ // Weak reference: Mach port for thread being profiled.
+ mach_port_t thread_port_;
+
+ const AnnotateCallback annotator_;
+
+ NativeStackSamplerTestDelegate* const test_delegate_;
+
+ // The stack base address corresponding to |thread_handle_|.
+ const void* const thread_stack_base_address_;
+
+ // Buffer to use for copies of the stack. We use the same buffer for all the
+ // samples to avoid the overhead of multiple allocations and frees.
+ const std::unique_ptr<unsigned char[]> stack_copy_buffer_;
+
+ // Weak. Points to the modules associated with the profile being recorded
+ // between ProfileRecordingStarting() and ProfileRecordingStopped().
+ std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;
+
+ // Maps a module's base address to the corresponding Module's index within
+ // current_modules_.
+ std::map<const void*, size_t> profile_module_index_;
+
+ DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
+};
+
+NativeStackSamplerMac::NativeStackSamplerMac(
+ mach_port_t thread_port,
+ AnnotateCallback annotator,
+ NativeStackSamplerTestDelegate* test_delegate)
+ : thread_port_(thread_port),
+ annotator_(annotator),
+ test_delegate_(test_delegate),
+ thread_stack_base_address_(
+ pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
+ stack_copy_buffer_(new unsigned char[kStackCopyBufferSize]) {
+ DCHECK(annotator_);
+
+ // This class suspends threads, and those threads might be suspended in dyld.
+ // Therefore, for all the system functions that might be linked in dynamically
+ // that are used while threads are suspended, make calls to them to make sure
+ // that they are linked up.
+ ThreadContext thread_context;
+ GetThreadContext(thread_port_, &thread_context);
+}
+
+NativeStackSamplerMac::~NativeStackSamplerMac() {}
+
+void NativeStackSamplerMac::ProfileRecordingStarting(
+ std::vector<StackSamplingProfiler::Module>* modules) {
+ current_modules_ = modules;
+ profile_module_index_.clear();
+}
+
+void NativeStackSamplerMac::RecordStackSample(
+ StackSamplingProfiler::Sample* sample) {
+ DCHECK(current_modules_);
+
+ if (!stack_copy_buffer_)
+ return;
+
+ SuspendThreadAndRecordStack(sample);
+}
+
+void NativeStackSamplerMac::ProfileRecordingStopped() {
+ current_modules_ = nullptr;
+}
+
+void NativeStackSamplerMac::SuspendThreadAndRecordStack(
+ StackSamplingProfiler::Sample* sample) {
+ ThreadContext thread_context;
+
+ // Copy the stack.
+
+ {
+ // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
+ // allocate memory, including indirectly via use of DCHECK/CHECK or other
+ // logging statements. Otherwise this code can deadlock on heap locks in the
+ // default heap acquired by the target thread before it was suspended.
+ ScopedSuspendThread suspend_thread(thread_port_);
+ if (!suspend_thread.was_successful())
+ return;
+
+ if (!GetThreadContext(thread_port_, &thread_context))
+ return;
+ uint64_t stack_top = reinterpret_cast<uint64_t>(thread_stack_base_address_);
+ uint64_t stack_bottom = thread_context.rsp;
+
+ if ((stack_top - stack_bottom) > kStackCopyBufferSize)
+ return;
+
+ (*annotator_)(sample);
+
+ CopyStackAndRewritePointers(
+ stack_copy_buffer_.get(), reinterpret_cast<void*>(stack_bottom),
+ reinterpret_cast<void*>(stack_top), &thread_context);
+ } // ScopedSuspendThread
+
+ if (test_delegate_)
+ test_delegate_->OnPreStackWalk();
+
+ // Walk the stack and record it.
+
+ // Reserve enough memory for most stacks, to avoid repeated allocations.
+ // Approximately 99.9% of recorded stacks are 128 frames or fewer.
+ sample->frames.reserve(128);
+
+ auto current_modules = current_modules_;
+ auto profile_module_index = &profile_module_index_;
+ WalkStack(thread_context, [sample, current_modules,
+ profile_module_index](uintptr_t frame_ip) {
+ sample->frames.push_back(StackSamplingProfiler::Frame(
+ frame_ip,
+ GetModuleIndex(frame_ip, current_modules, profile_module_index)));
+ });
+}
+
+} // namespace
+
+std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
+ PlatformThreadId thread_id,
+ AnnotateCallback annotator,
+ NativeStackSamplerTestDelegate* test_delegate) {
+#if defined(__i386__)
Mark Mentovai 2017/02/17 05:21:05 You can just make this a compile-time #error. And
Avi (use Gerrit) 2017/02/17 17:18:12 Re compile error: the _posix version of this file
+ return nullptr;
+#endif
+ return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator,
+ test_delegate);
+}
+
+} // namespace base

Powered by Google App Engine
This is Rietveld 408576698