Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1422)

Side by Side Diff: base/profiler/native_stack_sampler_mac.cc

Issue 2702463003: NativeStackSampler implementation for Mac. (Closed)
Patch Set: fix Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/profiler/native_stack_sampler.h"
6
7 #include <dlfcn.h>
8 #include <libkern/OSByteOrder.h>
9 #include <libunwind.h>
10 #include <mach-o/swap.h>
11 #include <mach/kern_return.h>
12 #include <mach/mach.h>
13 #include <mach/thread_act.h>
14 #include <pthread.h>
15 #include <sys/syslimits.h>
16
17 #include <map>
18 #include <memory>
19
20 #include "base/logging.h"
21 #include "base/macros.h"
22 #include "base/memory/ptr_util.h"
23 #include "base/strings/string_number_conversions.h"
24
25 namespace base {
26
27 namespace {
28
29 // Stack walking --------------------------------------------------------------
30
31 // Copy of x86_64 thread context structure from x86_thread_state64_t type.
32 // Copied struct since fields can have different names on different versions of
33 // Darwin.
Mark Mentovai 2017/02/17 05:21:05 ? It’s never going to not be the double-underscor
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
34 struct ThreadContext {
35 uint64_t rax;
36 uint64_t rbx;
37 uint64_t rcx;
38 uint64_t rdx;
39 uint64_t rdi;
40 uint64_t rsi;
41 uint64_t rbp;
42 uint64_t rsp;
43 uint64_t r8;
44 uint64_t r9;
45 uint64_t r10;
46 uint64_t r11;
47 uint64_t r12;
48 uint64_t r13;
49 uint64_t r14;
50 uint64_t r15;
51 uint64_t rip;
52 uint64_t rflags;
53 uint64_t cs;
54 uint64_t fs;
55 uint64_t gs;
56 };
57
58 // Fills |state| with |target_thread|'s context.
59 //
60 // Note that this is called while a thread is suspended. Make very very sure
61 // that no shared resources (e.g. memory allocators) are used for the duration
62 // of this function.
63 bool GetThreadContext(thread_act_t target_thread, ThreadContext* state) {
64 mach_msg_type_number_t count =
65 static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
66 return thread_get_state(target_thread, x86_THREAD_STATE64,
67 reinterpret_cast<thread_state_t>(state),
68 &count) == KERN_SUCCESS;
69 }
70
71 // If the value at |pointer| points to the original stack, rewrite it to point
72 // to the corresponding location in the copied stack.
73 //
74 // Note that this is called while a thread is suspended. Make very very sure
75 // that no shared resources (e.g. memory allocators) are used for the duration
76 // of this function.
77 uint64_t RewritePointerIfInOriginalStack(uint64_t* original_stack_bottom,
78 uint64_t* original_stack_top,
79 uint64_t* stack_copy_bottom,
80 uint64_t pointer) {
81 uint64_t original_stack_bottom_int =
82 reinterpret_cast<uint64_t>(original_stack_bottom);
83 uint64_t original_stack_top_int =
84 reinterpret_cast<uint64_t>(original_stack_top);
85 uint64_t stack_copy_bottom_int =
86 reinterpret_cast<uint64_t>(stack_copy_bottom);
87
88 if ((pointer < original_stack_bottom_int) ||
89 (pointer >= original_stack_top_int)) {
90 return pointer;
91 }
92
93 return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
94 }
95
96 // Copy the stack to a buffer while rewriting possible pointers to locations
97 // within the stack to point to the corresponding locations in the copy. This is
98 // necessary to handle stack frames with dynamic stack allocation, where a
99 // pointer to the beginning of the dynamic allocation area is stored on the
100 // stack and/or in a non-volatile register.
101 //
102 // Eager rewriting of anything that looks like a pointer to the stack, as done
103 // in this function, does not adversely affect the stack unwinding. The only
104 // other values on the stack the unwinding depends on are return addresses,
105 // which should not point within the stack memory. The rewriting is guaranteed
106 // to catch all pointers because the stacks are guaranteed by the ABI to be
107 // sizeof(void*) aligned.
108 //
109 // Note that this is called while a thread is suspended. Make very very sure
110 // that no shared resources (e.g. memory allocators) are used for the duration
111 // of this function.
112 void CopyStackAndRewritePointers(void* dest,
113 void* from,
114 void* to,
115 ThreadContext* thread_context)
116 NO_SANITIZE("address") {
117 uint64_t* original_stack_bottom = static_cast<uint64_t*>(from);
118 uint64_t* original_stack_top = static_cast<uint64_t*>(to);
119 uint64_t* stack_copy_bottom = static_cast<uint64_t*>(dest);
120
121 size_t count = original_stack_top - original_stack_bottom;
122 for (size_t pos = 0; pos < count; ++pos) {
123 stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
124 original_stack_bottom, original_stack_top, stack_copy_bottom,
125 original_stack_bottom[pos]);
126 }
127
128 thread_context->rbp =
129 RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
130 stack_copy_bottom, thread_context->rbp);
131 thread_context->rsp =
132 RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
133 stack_copy_bottom, thread_context->rsp);
134 }
135
136 const char* LibSystemKernelName() {
137 static char path[PATH_MAX];
138 static char* name = nullptr;
139 if (name)
140 return name;
141
142 Dl_info info;
143 dladdr(reinterpret_cast<void*>(_exit), &info);
144 strncpy(path, info.dli_fname, PATH_MAX);
145 name = path;
146 DCHECK_EQ(std::string(name),
147 std::string("/usr/lib/system/libsystem_kernel.dylib"));
148 return name;
149 }
150
151 enum StackWalkResult : int {
152 ERROR = -1,
153 SUCCESS,
154 SYSCALL,
155 };
156
157 // Walks the stack represented by |unwind_context|, calling back to the provided
158 // lambda for each frame.
159 template <typename StackFrameCallback>
160 StackWalkResult WalkStackFromContext(unw_context_t* unwind_context,
161 const StackFrameCallback& callback) {
162 unw_cursor_t unwind_cursor;
163 unw_init_local(&unwind_cursor, unwind_context);
164
165 int step_result;
166 unw_word_t ip;
167 size_t frames = 0;
168 do {
169 ++frames;
170 unw_get_reg(&unwind_cursor, UNW_REG_IP, &ip);
171
172 callback(static_cast<uintptr_t>(ip));
173
174 step_result = unw_step(&unwind_cursor);
175 } while (step_result > 0);
176
177 if (step_result != 0)
178 return StackWalkResult::ERROR;
179
180 Dl_info info;
181 if (frames == 1 && dladdr(reinterpret_cast<void*>(ip), &info) != 0 &&
182 strcmp(info.dli_fname, LibSystemKernelName()) == 0) {
183 return StackWalkResult::SYSCALL;
184 }
185
186 return StackWalkResult::SUCCESS;
187 }
188
189 // Walks the stack represented by |thread_context|, calling back to the provided
190 // lambda for each frame.
191 template <typename StackFrameCallback>
192 void WalkStack(const ThreadContext& thread_context,
193 const StackFrameCallback& callback) {
194 // This uses libunwind to walk the stack. libunwind is designed to be used for
195 // a thread to walk its own stack. This creates two problems.
196
197 // Problem 1: There is no official way to create a unw_context other than to
198 // create it from the current state of the current thread's stack. To get
199 // around this, forge a context. A unw_context is just a copy of the register
200 // file followed by the instruction pointer. Coincidentally, the first 17
Mark Mentovai 2017/02/17 05:21:05 “register file plus instruction pointer” is weird
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
201 // items of the ThreadContext type are exactly that!
202 unw_context_t unwind_context;
203 memcpy(&unwind_context, &thread_context, sizeof(uint64_t) * 17);
204 StackWalkResult result = WalkStackFromContext(&unwind_context, callback);
205
206 if (result == StackWalkResult::SYSCALL) {
207 // Problem 2: Because libunwind is designed to be triggered by user code on
208 // their own thread, if it hits a library that has no unwind info for the
209 // function that is being executed, it just stops. This isn't a problem in
210 // the normal case, but in this case, it's quite possible that the stack
211 // being walked is stopped in a function that bridges to the kernel and thus
212 // is missing the unwind info.
213 //
214 // If so, cheat by manually unwinding one stack frame and trying again.
215 unwind_context.data[7] = thread_context.rsp + 8; // rsp++
216 unwind_context.data[16] =
217 *reinterpret_cast<uint64_t*>(thread_context.rsp); // rip = *rsp
218 WalkStackFromContext(&unwind_context, callback);
219 }
220 }
221
222 // Module identifiers ---------------------------------------------------------
223
224 // Helper that swaps byte order in |x| if |swap| flag is set.
225 uint32_t SwapIfBig32(uint32_t x, bool swap) {
Mark Mentovai 2017/02/17 05:21:05 You don’t need any of this swapping stuff.
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
226 if (swap)
227 return OSSwapBigToHostInt32(x);
228 return x;
229 }
230
231 // Returns the offset in bytes where the x86_64 header is located in a binary
232 // loaded at |module_addr|. Returns 0 if |module_addr| is not a valid FAT
233 // Mach-O binary or has not been built for x86_64.
234 off_t GetMach64HeaderOffset(const void* module_addr) {
Mark Mentovai 2017/02/17 05:21:05 You don’t need any of this fat stuff either.
Avi (use Gerrit) 2017/02/17 17:18:13 Acknowledged.
235 const fat_header* header = reinterpret_cast<const fat_header*>(module_addr);
236 if (header->magic != FAT_MAGIC && header->magic != FAT_CIGAM)
237 return 0;
238
239 // Search all FAT architectures for x86_64.
240 const fat_arch* fat_arches = reinterpret_cast<const fat_arch*>(
241 reinterpret_cast<const uint8_t*>(module_addr) + sizeof(header));
242 uint32_t n_arches = OSSwapBigToHostInt32(header->nfat_arch);
243 for (uint32_t i = 0; i < n_arches; ++i) {
244 const fat_arch& arch = fat_arches[i];
245 if (OSSwapBigToHostInt32(arch.cputype) == CPU_TYPE_X86_64)
246 return OSSwapBigToHostInt32(arch.offset);
247 }
248 return 0;
249 }
250
251 // Returns true if the Mach-O binary at |module_addr| was built specifically for
252 // the x86_64 CPU architecture.
253 bool IsX64Header(const void* module_addr) {
254 const mach_header_64* header =
255 reinterpret_cast<const mach_header_64*>(module_addr);
256 if (header->magic != MH_MAGIC_64 && header->magic != MH_CIGAM_64)
257 return false;
258 bool swap = header->magic == MH_CIGAM_64;
259 return SwapIfBig32(header->cputype, swap) == CPU_TYPE_X86_64;
260 }
261
262 // Fills |id| with the UUID of the x86_64 Mach-O binary loaded at |module_addr|.
263 // |offset| is the offset in bytes into |module_addr| where the x86_64 header is
264 // located. |offset| is only relevant if the binary is FAT and contains multiple
265 // architecture headers. Returns false if the header is malformed or the header
266 // does not specify the UUID load command.
267 bool GetX64UUIDAt(const void* module_addr, unsigned char* id, off_t offset) {
268 const mach_header_64* header = reinterpret_cast<const mach_header_64*>(
269 reinterpret_cast<const uint8_t*>(module_addr) + offset);
270 if (header->magic != MH_MAGIC_64 && header->magic != MH_CIGAM_64)
271 return false;
272
273 bool swap = header->magic == MH_CIGAM_64;
274 // Search all load commands for UUID command.
275 offset += sizeof(mach_header_64);
276 for (uint32_t i = 0; i < SwapIfBig32(header->ncmds, swap); ++i) {
277 const load_command* current_cmd = reinterpret_cast<const load_command*>(
Mark Mentovai 2017/02/17 05:21:05 This loop needs to be cognizant of not exceeding h
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
278 reinterpret_cast<const uint8_t*>(module_addr) + offset);
279
280 if (SwapIfBig32(current_cmd->cmd, swap) == LC_UUID) {
281 const uuid_command* uuid_cmd =
Mark Mentovai 2017/02/17 05:21:05 Also need to check that current_cmd->cmdsize is at
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
282 reinterpret_cast<const uuid_command*>(current_cmd);
283 static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
284 "UUID field of UUID command should be 16 bytes.");
285 memcpy(id, &uuid_cmd->uuid, sizeof(uuid_t));
Mark Mentovai 2017/02/17 05:21:05 (if you were swapping, and you’re not, this would
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
286 return true;
287 }
288 offset += SwapIfBig32(current_cmd->cmdsize, swap);
289 }
290 return false;
291 }
292
293 // Fills |id| with the Mach-O UUID retrieved from Mach-O binary loaded at
294 // |module_addr|. This function returns false if the binary was not built for
295 // X86_64 or if the UUID cannot be found.
296 bool GetUUID(const void* module_addr, unsigned char* id) {
Mark Mentovai 2017/02/17 05:21:05 (this is the only caller of the swappy and fatty f
Avi (use Gerrit) 2017/02/17 17:18:13 Acknowledged.
297 off_t offset = 0;
298 // If the module is not x86_64 exclusive, it could be a module that supports
299 // multiple architectures. In that case, the appropriate header will be at
300 // some non-zero offset.
301 if (!IsX64Header(module_addr) &&
302 !(offset = GetMach64HeaderOffset(module_addr))) {
303 return false;
304 }
305 return GetX64UUIDAt(module_addr, id, offset);
306 }
307
308 // Returns the hex encoding of a 16-byte ID for the binary loaded at
309 // |module_addr|. Returns an empty string if the UUID cannot be found at
310 // |module_addr|.
311 std::string GetUniqueId(const void* module_addr) {
Mark Mentovai 2017/02/17 05:21:05 (this is the only caller of the caller of the swap
Avi (use Gerrit) 2017/02/17 17:18:12 Acknowledged.
312 unsigned char id[sizeof(uuid_t)];
313 if (!GetUUID(module_addr, id))
314 return "";
315 return HexEncode(id, sizeof(uuid_t));
316 }
317
318 // Gets the index for the Module containing |instruction_pointer| in
319 // |modules|, adding it if it's not already present. Returns
320 // StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
321 // determined for |module|.
322 size_t GetModuleIndex(const uintptr_t instruction_pointer,
323 std::vector<StackSamplingProfiler::Module>* modules,
324 std::map<const void*, size_t>* profile_module_index) {
325 Dl_info inf;
326 if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
327 return StackSamplingProfiler::Frame::kUnknownModuleIndex;
328
329 auto module_index = profile_module_index->find(inf.dli_fbase);
330 if (module_index == profile_module_index->end()) {
331 StackSamplingProfiler::Module module(
332 reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
Mark Mentovai 2017/02/17 05:21:05 (this is the only caller of the caller of the call
Avi (use Gerrit) 2017/02/17 17:18:12 Done.
333 base::FilePath(inf.dli_fname));
334 modules->push_back(module);
335 module_index =
336 profile_module_index
337 ->insert(std::make_pair(inf.dli_fbase, modules->size() - 1))
338 .first;
339 }
340 return module_index->second;
341 }
342
343 // ScopedSuspendThread --------------------------------------------------------
344
345 // Suspends a thread for the lifetime of the object.
346 class ScopedSuspendThread {
347 public:
348 explicit ScopedSuspendThread(mach_port_t thread_port);
349 ~ScopedSuspendThread();
350
351 bool was_successful() const { return was_successful_; }
352
353 private:
354 mach_port_t thread_port_;
355 bool was_successful_;
356
357 DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
358 };
359
360 ScopedSuspendThread::ScopedSuspendThread(mach_port_t thread_port)
361 : thread_port_(thread_port),
362 was_successful_(thread_suspend(thread_port) == KERN_SUCCESS) {}
363
364 ScopedSuspendThread::~ScopedSuspendThread() {
365 if (!was_successful_)
366 return;
367
368 kern_return_t resume_result = thread_resume(thread_port_);
369 CHECK_EQ(KERN_SUCCESS, resume_result) << "thread_resume failed";
370 }
371
372 // NativeStackSamplerMac ------------------------------------------------------
373
374 class NativeStackSamplerMac : public NativeStackSampler {
375 public:
376 NativeStackSamplerMac(mach_port_t thread_port,
377 AnnotateCallback annotator,
378 NativeStackSamplerTestDelegate* test_delegate);
379 ~NativeStackSamplerMac() override;
380
381 // StackSamplingProfiler::NativeStackSampler:
382 void ProfileRecordingStarting(
383 std::vector<StackSamplingProfiler::Module>* modules) override;
384 void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
385 void ProfileRecordingStopped() override;
386
387 private:
388 enum {
389 // Intended to hold the largest stack used by Chrome. The default macOS main
390 // thread stack size is 8 MB, and this allows for expansion if it occurs.
391 kStackCopyBufferSize = 12 * 1024 * 1024
392 };
393
394 // Suspends the thread with |thread_port_|, copies its stack and resumes the
395 // thread, then records the stack frames and associated modules into |sample|.
396 void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample);
397
398 // Weak reference: Mach port for thread being profiled.
399 mach_port_t thread_port_;
400
401 const AnnotateCallback annotator_;
402
403 NativeStackSamplerTestDelegate* const test_delegate_;
404
405 // The stack base address corresponding to |thread_handle_|.
406 const void* const thread_stack_base_address_;
407
408 // Buffer to use for copies of the stack. We use the same buffer for all the
409 // samples to avoid the overhead of multiple allocations and frees.
410 const std::unique_ptr<unsigned char[]> stack_copy_buffer_;
411
412 // Weak. Points to the modules associated with the profile being recorded
413 // between ProfileRecordingStarting() and ProfileRecordingStopped().
414 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;
415
416 // Maps a module's base address to the corresponding Module's index within
417 // current_modules_.
418 std::map<const void*, size_t> profile_module_index_;
419
420 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
421 };
422
423 NativeStackSamplerMac::NativeStackSamplerMac(
424 mach_port_t thread_port,
425 AnnotateCallback annotator,
426 NativeStackSamplerTestDelegate* test_delegate)
427 : thread_port_(thread_port),
428 annotator_(annotator),
429 test_delegate_(test_delegate),
430 thread_stack_base_address_(
431 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
432 stack_copy_buffer_(new unsigned char[kStackCopyBufferSize]) {
433 DCHECK(annotator_);
434
435 // This class suspends threads, and those threads might be suspended in dyld.
436 // Therefore, for all the system functions that might be linked in dynamically
437 // that are used while threads are suspended, make calls to them to make sure
438 // that they are linked up.
439 ThreadContext thread_context;
440 GetThreadContext(thread_port_, &thread_context);
441 }
442
443 NativeStackSamplerMac::~NativeStackSamplerMac() {}
444
445 void NativeStackSamplerMac::ProfileRecordingStarting(
446 std::vector<StackSamplingProfiler::Module>* modules) {
447 current_modules_ = modules;
448 profile_module_index_.clear();
449 }
450
451 void NativeStackSamplerMac::RecordStackSample(
452 StackSamplingProfiler::Sample* sample) {
453 DCHECK(current_modules_);
454
455 if (!stack_copy_buffer_)
456 return;
457
458 SuspendThreadAndRecordStack(sample);
459 }
460
461 void NativeStackSamplerMac::ProfileRecordingStopped() {
462 current_modules_ = nullptr;
463 }
464
465 void NativeStackSamplerMac::SuspendThreadAndRecordStack(
466 StackSamplingProfiler::Sample* sample) {
467 ThreadContext thread_context;
468
469 // Copy the stack.
470
471 {
472 // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
473 // allocate memory, including indirectly via use of DCHECK/CHECK or other
474 // logging statements. Otherwise this code can deadlock on heap locks in the
475 // default heap acquired by the target thread before it was suspended.
476 ScopedSuspendThread suspend_thread(thread_port_);
477 if (!suspend_thread.was_successful())
478 return;
479
480 if (!GetThreadContext(thread_port_, &thread_context))
481 return;
482 uint64_t stack_top = reinterpret_cast<uint64_t>(thread_stack_base_address_);
483 uint64_t stack_bottom = thread_context.rsp;
484
485 if ((stack_top - stack_bottom) > kStackCopyBufferSize)
486 return;
487
488 (*annotator_)(sample);
489
490 CopyStackAndRewritePointers(
491 stack_copy_buffer_.get(), reinterpret_cast<void*>(stack_bottom),
492 reinterpret_cast<void*>(stack_top), &thread_context);
493 } // ScopedSuspendThread
494
495 if (test_delegate_)
496 test_delegate_->OnPreStackWalk();
497
498 // Walk the stack and record it.
499
500 // Reserve enough memory for most stacks, to avoid repeated allocations.
501 // Approximately 99.9% of recorded stacks are 128 frames or fewer.
502 sample->frames.reserve(128);
503
504 auto current_modules = current_modules_;
505 auto profile_module_index = &profile_module_index_;
506 WalkStack(thread_context, [sample, current_modules,
507 profile_module_index](uintptr_t frame_ip) {
508 sample->frames.push_back(StackSamplingProfiler::Frame(
509 frame_ip,
510 GetModuleIndex(frame_ip, current_modules, profile_module_index)));
511 });
512 }
513
514 } // namespace
515
516 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
517 PlatformThreadId thread_id,
518 AnnotateCallback annotator,
519 NativeStackSamplerTestDelegate* test_delegate) {
520 #if defined(__i386__)
Mark Mentovai 2017/02/17 05:21:05 You can just make this a compile-time #error. And
Avi (use Gerrit) 2017/02/17 17:18:12 Re compile error: the _posix version of this file
521 return nullptr;
522 #endif
523 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator,
524 test_delegate);
525 }
526
527 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698