Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: base/profiler/native_stack_sampler_mac.cc

Issue 2702463003: NativeStackSampler implementation for Mac. (Closed)
Patch Set: 0u Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/profiler/native_stack_sampler.h"
6
7 #include <dlfcn.h>
8 #include <libkern/OSByteOrder.h>
9 #include <libunwind.h>
10 #include <mach-o/swap.h>
11 #include <mach/kern_return.h>
12 #include <mach/mach.h>
13 #include <mach/thread_act.h>
14 #include <pthread.h>
15 #include <sys/syslimits.h>
16
17 #include <map>
18 #include <memory>
19
20 #include "base/logging.h"
21 #include "base/macros.h"
22 #include "base/memory/ptr_util.h"
23 #include "base/strings/string_number_conversions.h"
24
25 namespace base {
26
27 namespace {
28
29 // Stack walking --------------------------------------------------------------
30
31 // Copy of x86_64 thread context structure from x86_thread_state64_t type.
32 // Copied struct since fields can have different names on different versions of
33 // Darwin.
34 struct ThreadContext {
35 uint64_t rax;
36 uint64_t rbx;
37 uint64_t rcx;
38 uint64_t rdx;
39 uint64_t rdi;
40 uint64_t rsi;
41 uint64_t rbp;
42 uint64_t rsp;
43 uint64_t r8;
44 uint64_t r9;
45 uint64_t r10;
46 uint64_t r11;
47 uint64_t r12;
48 uint64_t r13;
49 uint64_t r14;
50 uint64_t r15;
51 uint64_t rip;
52 uint64_t rflags;
53 uint64_t cs;
54 uint64_t fs;
55 uint64_t gs;
56 };
57
58 // Fills |state| with |target_thread|'s context.
Mike Wittman 2017/02/16 21:51:34 Should we have deadlock warnings analogous to the
Avi (use Gerrit) 2017/02/17 03:41:09 Done.
59 bool GetThreadContext(thread_act_t target_thread, ThreadContext* state) {
60 mach_msg_type_number_t count =
61 static_cast<mach_msg_type_number_t>(MACHINE_THREAD_STATE_COUNT);
62 return thread_get_state(target_thread, x86_THREAD_STATE64,
63 reinterpret_cast<thread_state_t>(state),
64 &count) == KERN_SUCCESS;
65 }
66
67 // If the value at |pointer| points to the original stack, rewrite it to point
68 // to the corresponding location in the copied stack.
69 uint64_t RewritePointerIfInOriginalStack(uint64_t* original_stack_bottom,
70 uint64_t* original_stack_top,
71 uint64_t* stack_copy_bottom,
72 uint64_t pointer) {
73 uint64_t original_stack_bottom_int =
74 reinterpret_cast<uint64_t>(original_stack_bottom);
75 uint64_t original_stack_top_int =
76 reinterpret_cast<uint64_t>(original_stack_top);
77 uint64_t stack_copy_bottom_int =
78 reinterpret_cast<uint64_t>(stack_copy_bottom);
79
80 if ((pointer < original_stack_bottom_int) ||
81 (pointer >= original_stack_top_int)) {
82 return pointer;
83 }
84
85 return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
86 }
87
88 void CopyStackAndRewritePointers(void* dest,
89 void* from,
90 void* to,
91 ThreadContext* thread_context)
92 NO_SANITIZE("address") {
93 uint64_t* original_stack_bottom = static_cast<uint64_t*>(from);
94 uint64_t* original_stack_top = static_cast<uint64_t*>(to);
95 uint64_t* stack_copy_bottom = static_cast<uint64_t*>(dest);
96 DCHECK_EQ(
97 0u, reinterpret_cast<uint64_t>(original_stack_bottom) % sizeof(uint64_t));
98 DCHECK_EQ(0u,
99 reinterpret_cast<uint64_t>(original_stack_top) % sizeof(uint64_t));
100 DCHECK_EQ(0u,
101 reinterpret_cast<uint64_t>(stack_copy_bottom) % sizeof(uint64_t));
102
103 size_t count = original_stack_top - original_stack_bottom;
104 for (size_t pos = 0; pos < count; ++pos) {
105 stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
106 original_stack_bottom, original_stack_top, stack_copy_bottom,
107 original_stack_bottom[pos]);
108 }
109
110 thread_context->rbp =
111 RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
112 stack_copy_bottom, thread_context->rbp);
113 thread_context->rsp =
114 RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
115 stack_copy_bottom, thread_context->rsp);
116 }
117
118 const char* LibSystemKernelName() {
119 static char path[PATH_MAX];
120 static char* name = nullptr;
121 if (name)
122 return name;
123
124 Dl_info info;
125 dladdr(reinterpret_cast<void*>(_exit), &info);
126 strncpy(path, info.dli_fname, PATH_MAX);
127 name = path;
128 DCHECK_EQ(std::string(name),
129 std::string("/usr/lib/system/libsystem_kernel.dylib"));
130 return name;
131 }
132
133 enum StackWalkResult : int {
134 ERROR = -1,
135 SUCCESS,
136 SYSCALL,
137 };
138
139 // Walks the stack represented by |unwind_context|, calling back to the provided
140 // lambda for each frame.
141 template <typename StackFrameCallback>
142 StackWalkResult WalkStackFromContext(unw_context_t* unwind_context,
143 const StackFrameCallback& callback) {
144 unw_cursor_t unwind_cursor;
145 unw_init_local(&unwind_cursor, unwind_context);
146
147 int step_result;
148 unw_word_t ip;
149 size_t frames = 0;
150 do {
151 ++frames;
152 unw_get_reg(&unwind_cursor, UNW_REG_IP, &ip);
153
154 callback(static_cast<uintptr_t>(ip));
155
156 step_result = unw_step(&unwind_cursor);
Mike Wittman 2017/02/16 21:51:34 General questions: How is unwinding of leaf functi
Avi (use Gerrit) 2017/02/17 03:41:09 I believe the compiler puts in full debug info eve
Mark Mentovai 2017/02/17 05:21:05 Avi wrote:
Mike Wittman 2017/02/17 17:09:15 Can we verify the behavior by running the profiler
Avi (use Gerrit) 2017/02/17 17:18:12 a) Didn't catch that the stack_sampling_configurat
Mike Wittman 2017/02/17 17:38:51 That's the only other thing necessary to enable fo
157 } while (step_result > 0);
158
159 if (step_result != 0)
160 return StackWalkResult::ERROR;
161
162 Dl_info info;
163 if (frames == 1 && dladdr(reinterpret_cast<void*>(ip), &info) != 0 &&
164 strcmp(info.dli_fname, LibSystemKernelName()) == 0) {
165 return StackWalkResult::SYSCALL;
166 }
167
168 return StackWalkResult::SUCCESS;
169 }
170
171 // Walks the stack represented by |thread_context|, calling back to the provided
172 // lambda for each frame.
173 template <typename StackFrameCallback>
174 void WalkStack(const ThreadContext& thread_context,
175 const StackFrameCallback& callback) {
176 // This uses libunwind to walk the stack. libunwind is designed to be used for
177 // a thread to walk its own stack. This creates two problems.
178
179 // Problem 1: There is no official way to create a unw_context other than to
180 // create it from the current state of the current thread's stack. To get
181 // around this, forge a context. A unw_context is just a copy of the register
182 // file followed by the instruction pointer. Coincidentally, the first 17
183 // items of the ThreadContext type are exactly that!
184 unw_context_t unwind_context;
185 memcpy(&unwind_context, &thread_context, sizeof(uint64_t) * 17);
186 StackWalkResult result = WalkStackFromContext(&unwind_context, callback);
187
188 if (result == StackWalkResult::SYSCALL) {
189 // Problem 2: Because libunwind is designed to be triggered by user code on
190 // their own thread, if it hits a library that has no unwind info for the
191 // function that is being executed, it just stops. This isn't a problem in
192 // the normal case, but in this case, it's quite possible that the stack
193 // being walked is stopped in a function that bridges to the kernel and thus
194 // is missing the unwind info.
195 //
196 // If so, cheat by manually unwinding one stack frame and trying again.
197 unwind_context.data[7] = thread_context.rsp + 8; // rsp++
198 unwind_context.data[16] =
199 *reinterpret_cast<uint64_t*>(thread_context.rsp); // rip = *rsp
200 WalkStackFromContext(&unwind_context, callback);
201 }
202 }
203
204 // Module identifiers ---------------------------------------------------------
205
206 // Helper that swaps byte order in |x| if |swap| flag is set.
207 uint32_t SwapIfBig32(uint32_t x, bool swap) {
208 if (swap)
209 return OSSwapBigToHostInt32(x);
210 return x;
211 }
212
213 // Returns the offset in bytes where the x86_64 header is located in a binary
214 // loaded at |module_addr|. Returns 0 if |module_addr| is not a valid FAT
215 // Mach-O binary or has not been built for x86_64.
216 off_t GetMach64HeaderOffset(const void* module_addr) {
217 const fat_header* header = reinterpret_cast<const fat_header*>(module_addr);
218 if (header->magic != FAT_MAGIC && header->magic != FAT_CIGAM)
219 return 0;
220
221 // Search all FAT architectures for x86_64.
222 const fat_arch* fat_arches = reinterpret_cast<const fat_arch*>(
223 reinterpret_cast<const uint8_t*>(module_addr) + sizeof(header));
224 uint32_t n_arches = OSSwapBigToHostInt32(header->nfat_arch);
225 for (uint32_t i = 0; i < n_arches; ++i) {
226 const fat_arch& arch = fat_arches[i];
227 if (OSSwapBigToHostInt32(arch.cputype) == CPU_TYPE_X86_64)
228 return OSSwapBigToHostInt32(arch.offset);
229 }
230 return 0;
231 }
232
233 // Returns true if the Mach-O binary at |module_addr| was built specifically for
234 // the x86_64 CPU architecture.
235 bool IsX64Header(const void* module_addr) {
236 const mach_header_64* header =
237 reinterpret_cast<const mach_header_64*>(module_addr);
238 if (header->magic != MH_MAGIC_64 && header->magic != MH_CIGAM_64)
239 return false;
240 bool swap = header->magic == MH_CIGAM_64;
241 return SwapIfBig32(header->cputype, swap) == CPU_TYPE_X86_64;
242 }
243
244 // Fills |id| with the UUID of the x86_64 Mach-O binary loaded at |module_addr|.
245 // |offset| is the offset in bytes into |module_addr| where the x86_64 header is
246 // located. |offset| is only relevant if the binary is FAT and contains multiple
247 // architecture headers. Returns false if the header is malformed or the header
248 // does not specify the UUID load command.
249 bool GetX64UUIDAt(const void* module_addr, unsigned char* id, off_t offset) {
250 const mach_header_64* header = reinterpret_cast<const mach_header_64*>(
251 reinterpret_cast<const uint8_t*>(module_addr) + offset);
252 if (header->magic != MH_MAGIC_64 && header->magic != MH_CIGAM_64)
253 return false;
254
255 bool swap = header->magic == MH_CIGAM_64;
256 // Search all load commands for UUID command.
257 offset += sizeof(mach_header_64);
258 for (uint32_t i = 0; i < SwapIfBig32(header->ncmds, swap); ++i) {
259 const load_command* current_cmd = reinterpret_cast<const load_command*>(
260 reinterpret_cast<const uint8_t*>(module_addr) + offset);
261
262 if (SwapIfBig32(current_cmd->cmd, swap) == LC_UUID) {
263 const uuid_command* uuid_cmd =
264 reinterpret_cast<const uuid_command*>(current_cmd);
265 static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
266 "UUID field of UUID command should be 16 bytes.");
267 memcpy(id, &uuid_cmd->uuid, sizeof(uuid_t));
268 return true;
269 }
270 offset += SwapIfBig32(current_cmd->cmdsize, swap);
271 }
272 return false;
273 }
274
275 // Fills |id| with the Mach-O UUID retrieved from Mach-O binary loaded at
276 // |module_addr|. This function returns false if the binary was not built for
277 // X86_64 or if the UUID cannot be found.
278 bool GetUUID(const void* module_addr, unsigned char* id) {
279 off_t offset = 0;
280 // If the module is not x86_64 exclusive, it could be a module that supports
281 // multiple architectures. In that case, the appropriate header will be at
282 // some non-zero offset.
283 if (!IsX64Header(module_addr) &&
284 !(offset = GetMach64HeaderOffset(module_addr))) {
285 return false;
286 }
287 return GetX64UUIDAt(module_addr, id, offset);
288 }
289
290 // Returns the hex encoding of a 16-byte ID for the binary loaded at
291 // |module_addr|. Returns an empty string if the UUID cannot be found at
292 // |module_addr|.
293 std::string GetUniqueId(const void* module_addr) {
294 unsigned char id[sizeof(uuid_t)];
295 if (!GetUUID(module_addr, id))
296 return "";
297 return HexEncode(id, sizeof(uuid_t));
298 }
299
300 // Gets the index for the Module containing |instruction_pointer| in
301 // |modules|, adding it if it's not already present. Returns
302 // StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
303 // determined for |module|.
304 size_t GetModuleIndex(const uintptr_t instruction_pointer,
305 std::vector<StackSamplingProfiler::Module>* modules,
306 std::map<const void*, size_t>* profile_module_index) {
307 Dl_info inf;
308 if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
309 return StackSamplingProfiler::Frame::kUnknownModuleIndex;
310
311 auto module_index = profile_module_index->find(inf.dli_fbase);
312 if (module_index == profile_module_index->end()) {
313 StackSamplingProfiler::Module module(
314 reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
315 base::FilePath(inf.dli_fname));
316 modules->push_back(module);
317 module_index =
318 profile_module_index
319 ->insert(std::make_pair(inf.dli_fbase, modules->size() - 1))
320 .first;
321 }
322 return module_index->second;
323 }
324
325 // ScopedSuspendThread --------------------------------------------------------
326
327 // Suspends a thread for the lifetime of the object.
328 class ScopedSuspendThread {
329 public:
330 explicit ScopedSuspendThread(mach_port_t thread_port);
331 ~ScopedSuspendThread();
332
333 bool was_successful() const { return was_successful_; }
334
335 private:
336 mach_port_t thread_port_;
337 bool was_successful_;
338
339 DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
340 };
341
342 ScopedSuspendThread::ScopedSuspendThread(mach_port_t thread_port)
343 : thread_port_(thread_port),
344 was_successful_(thread_suspend(thread_port) == KERN_SUCCESS) {}
345
346 ScopedSuspendThread::~ScopedSuspendThread() {
347 if (!was_successful_)
348 return;
349
350 kern_return_t resume_result = thread_resume(thread_port_);
351 CHECK_EQ(KERN_SUCCESS, resume_result) << "thread_resume failed";
352 }
353
354 // NativeStackSamplerMac ------------------------------------------------------
355
356 class NativeStackSamplerMac : public NativeStackSampler {
357 public:
358 NativeStackSamplerMac(mach_port_t thread_port,
359 AnnotateCallback annotator,
360 NativeStackSamplerTestDelegate* test_delegate);
361 ~NativeStackSamplerMac() override;
362
363 // StackSamplingProfiler::NativeStackSampler:
364 void ProfileRecordingStarting(
365 std::vector<StackSamplingProfiler::Module>* modules) override;
366 void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
367 void ProfileRecordingStopped() override;
368
369 private:
370 enum {
371 // Intended to hold the largest stack used by Chrome. The default macOS main
372 // thread stack size is 8 MB, and this allows for expansion if it occurs.
373 kStackCopyBufferSize = 12 * 1024 * 1024
374 };
375
376 // Suspends the thread with |thread_port_|, copies its stack and resumes the
377 // thread, then records the stack frames and associated modules into |sample|.
378 void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample);
379
380 // Weak reference: Mach port for thread being profiled.
381 mach_port_t thread_port_;
382
383 const AnnotateCallback annotator_;
384
385 NativeStackSamplerTestDelegate* const test_delegate_;
386
387 // The stack base address corresponding to |thread_handle_|.
388 const void* const thread_stack_base_address_;
389
390 // Buffer to use for copies of the stack. We use the same buffer for all the
391 // samples to avoid the overhead of multiple allocations and frees.
392 const std::unique_ptr<unsigned char[]> stack_copy_buffer_;
393
394 // Weak. Points to the modules associated with the profile being recorded
395 // between ProfileRecordingStarting() and ProfileRecordingStopped().
396 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;
397
398 // Maps a module's base address to the corresponding Module's index within
399 // current_modules_.
400 std::map<const void*, size_t> profile_module_index_;
401
402 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
403 };
404
405 NativeStackSamplerMac::NativeStackSamplerMac(
406 mach_port_t thread_port,
407 AnnotateCallback annotator,
408 NativeStackSamplerTestDelegate* test_delegate)
409 : thread_port_(thread_port),
410 annotator_(annotator),
411 test_delegate_(test_delegate),
412 thread_stack_base_address_(
413 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
414 stack_copy_buffer_(new unsigned char[kStackCopyBufferSize]) {
415 DCHECK(annotator_);
416
417 // This class suspends threads, and those threads might be suspended in dyld.
418 // Therefore, for all the system functions that might be linked in dynamically
419 // that are used while threads are suspended, make calls to them to make sure
420 // that they are linked up.
421 ThreadContext thread_context;
422 GetThreadContext(thread_port_, &thread_context);
423 }
424
425 NativeStackSamplerMac::~NativeStackSamplerMac() {}
426
427 void NativeStackSamplerMac::ProfileRecordingStarting(
428 std::vector<StackSamplingProfiler::Module>* modules) {
429 current_modules_ = modules;
430 profile_module_index_.clear();
431 }
432
433 void NativeStackSamplerMac::RecordStackSample(
434 StackSamplingProfiler::Sample* sample) {
435 DCHECK(current_modules_);
436
437 if (!stack_copy_buffer_)
438 return;
439
440 SuspendThreadAndRecordStack(sample);
441 }
442
443 void NativeStackSamplerMac::ProfileRecordingStopped() {
444 current_modules_ = nullptr;
445 }
446
447 void NativeStackSamplerMac::SuspendThreadAndRecordStack(
448 StackSamplingProfiler::Sample* sample) {
449 ThreadContext thread_context;
450
451 // Copy the stack.
452
453 {
454 ScopedSuspendThread suspend_thread(thread_port_);
455 if (!suspend_thread.was_successful())
456 return;
457
458 if (!GetThreadContext(thread_port_, &thread_context))
459 return;
460 uint64_t stack_top = reinterpret_cast<uint64_t>(thread_stack_base_address_);
461 uint64_t stack_bottom = thread_context.rsp;
462
463 if ((stack_top - stack_bottom) > kStackCopyBufferSize)
464 return;
465
466 (*annotator_)(sample);
467
468 CopyStackAndRewritePointers(
469 stack_copy_buffer_.get(), reinterpret_cast<void*>(stack_bottom),
470 reinterpret_cast<void*>(stack_top), &thread_context);
471 } // ScopedSuspendThread
472
473 if (test_delegate_)
474 test_delegate_->OnPreStackWalk();
475
476 // Walk the stack and record it.
477
478 auto current_modules = current_modules_;
479 auto profile_module_index = &profile_module_index_;
Mike Wittman 2017/02/16 21:51:34 Can we add a sample->frames.reserve() call here to
Avi (use Gerrit) 2017/02/17 03:41:09 Ah! Done.
480 WalkStack(thread_context, [sample, current_modules,
Mike Wittman 2017/02/16 21:51:34 Nice, using lambdas makes for a much cleaner solut
Avi (use Gerrit) 2017/02/17 03:41:09 Acknowledged.
481 profile_module_index](uintptr_t frame_ip) {
482 sample->frames.push_back(StackSamplingProfiler::Frame(
483 frame_ip,
484 GetModuleIndex(frame_ip, current_modules, profile_module_index)));
485 });
486 }
487
488 } // namespace
489
490 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
491 PlatformThreadId thread_id,
492 AnnotateCallback annotator,
493 NativeStackSamplerTestDelegate* test_delegate) {
494 #if defined(__i386__)
495 return nullptr;
496 #endif
497 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator,
498 test_delegate);
499 }
500
501 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698