Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Side by Side Diff: base/profiler/native_stack_sampler_mac.cc

Issue 2702463003: NativeStackSampler implementation for Mac. (Closed)
Patch Set: . Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/BUILD.gn ('k') | base/profiler/native_stack_sampler_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/profiler/native_stack_sampler.h"
6
7 #include <dlfcn.h>
8 #include <libkern/OSByteOrder.h>
9 #include <libunwind.h>
10 #include <mach-o/swap.h>
11 #include <mach/kern_return.h>
12 #include <mach/mach.h>
13 #include <mach/thread_act.h>
14 #include <pthread.h>
15 #include <sys/resource.h>
16 #include <sys/syslimits.h>
17
18 #include <algorithm>
19 #include <map>
20 #include <memory>
21
22 #include "base/logging.h"
23 #include "base/mac/mach_logging.h"
24 #include "base/macros.h"
25 #include "base/memory/ptr_util.h"
26 #include "base/strings/string_number_conversions.h"
27
28 namespace base {
29
30 namespace {
31
32 // Miscellaneous --------------------------------------------------------------
33
34 size_t StackCopyBufferSize() {
35 static size_t stack_size = 0;
36 if (stack_size)
37 return stack_size;
38
39 // In platform_thread_mac's GetDefaultThreadStackSize(), RLIMIT_STACK is used
40 // for all stacks, not just the main thread's, so it is good for use here.
41 struct rlimit stack_rlimit;
42 if (getrlimit(RLIMIT_STACK, &stack_rlimit) == 0 &&
43 stack_rlimit.rlim_cur != RLIM_INFINITY) {
44 stack_size = stack_rlimit.rlim_cur;
45 return stack_size;
46 }
47
48 // If getrlimit somehow fails, return the default macOS main thread stack size
49 // of 8 MB with extra wiggle room.
Mark Mentovai 2017/03/30 02:33:26 Provide a comment saying where the default comes f
Avi (use Gerrit) 2017/03/30 20:13:38 Should I just #include that file and use DFLSSIZ?
Mark Mentovai 2017/03/30 20:18:15 If you want. You don’t really have to because the
50 return 12 * 1024 * 1024;
51 }
52
53 // Stack walking --------------------------------------------------------------
54
55 // Fills |state| with |target_thread|'s context.
56 //
57 // Note that this is called while a thread is suspended. Make very very sure
58 // that no shared resources (e.g. memory allocators) are used for the duration
59 // of this function.
60 bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
61 mach_msg_type_number_t count =
62 static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
63 return thread_get_state(target_thread, x86_THREAD_STATE64,
64 reinterpret_cast<thread_state_t>(state),
65 &count) == KERN_SUCCESS;
66 }
67
68 // If the value at |pointer| points to the original stack, rewrites it to point
69 // to the corresponding location in the copied stack.
70 //
71 // Note that this is called while a thread is suspended. Make very very sure
72 // that no shared resources (e.g. memory allocators) are used for the duration
73 // of this function.
74 uintptr_t RewritePointerIfInOriginalStack(
75 const uintptr_t* original_stack_bottom,
76 const uintptr_t* original_stack_top,
77 uintptr_t* stack_copy_bottom,
78 uintptr_t pointer) {
79 uintptr_t original_stack_bottom_int =
80 reinterpret_cast<uintptr_t>(original_stack_bottom);
81 uintptr_t original_stack_top_int =
82 reinterpret_cast<uintptr_t>(original_stack_top);
83 uintptr_t stack_copy_bottom_int =
84 reinterpret_cast<uintptr_t>(stack_copy_bottom);
85
86 if ((pointer < original_stack_bottom_int) ||
87 (pointer >= original_stack_top_int)) {
88 return pointer;
89 }
90
91 return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
92 }
93
94 // Copies the stack to a buffer while rewriting possible pointers to locations
95 // within the stack to point to the corresponding locations in the copy. This is
96 // necessary to handle stack frames with dynamic stack allocation, where a
97 // pointer to the beginning of the dynamic allocation area is stored on the
98 // stack and/or in a non-volatile register.
99 //
100 // Eager rewriting of anything that looks like a pointer to the stack, as done
101 // in this function, does not adversely affect the stack unwinding. The only
102 // other values on the stack the unwinding depends on are return addresses,
103 // which should not point within the stack memory. The rewriting is guaranteed
104 // to catch all pointers because the stacks are guaranteed by the ABI to be
105 // sizeof(void*) aligned.
106 //
107 // Note that this is called while a thread is suspended. Make very very sure
108 // that no shared resources (e.g. memory allocators) are used for the duration
109 // of this function.
110 void CopyStackAndRewritePointers(uintptr_t* stack_copy_bottom,
111 const uintptr_t* original_stack_bottom,
112 const uintptr_t* original_stack_top,
113 x86_thread_state64_t* thread_state)
114 NO_SANITIZE("address") {
115 size_t count = original_stack_top - original_stack_bottom;
116 for (size_t pos = 0; pos < count; ++pos) {
117 stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
118 original_stack_bottom, original_stack_top, stack_copy_bottom,
119 original_stack_bottom[pos]);
120 }
121
122 uint64_t* rewrite_registers[] = {&thread_state->__rbx, &thread_state->__rbp,
123 &thread_state->__rsp, &thread_state->__r12,
124 &thread_state->__r13, &thread_state->__r14,
125 &thread_state->__r15};
126 for (auto* reg : rewrite_registers) {
127 *reg = RewritePointerIfInOriginalStack(
128 original_stack_bottom, original_stack_top, stack_copy_bottom, *reg);
129 }
130 }
131
132 // Walks the stack represented by |unwind_context|, calling back to the provided
133 // lambda for each frame. Returns false if an error occurred, otherwise returns
134 // true.
135 template <typename StackFrameCallback>
136 bool WalkStackFromContext(unw_context_t* unwind_context,
137 size_t* frame_count,
138 const StackFrameCallback& callback) {
139 unw_cursor_t unwind_cursor;
140 unw_init_local(&unwind_cursor, unwind_context);
141
142 int step_result;
143 unw_word_t ip;
144 do {
145 ++(*frame_count);
146 unw_get_reg(&unwind_cursor, UNW_REG_IP, &ip);
147
148 callback(static_cast<uintptr_t>(ip));
149
150 step_result = unw_step(&unwind_cursor);
151 } while (step_result > 0);
152
153 if (step_result != 0)
154 return false;
155
156 return true;
157 }
158
159 bool IsIPInValidImage(unw_context_t* unwind_context) {
160 unw_cursor_t unwind_cursor;
161 unw_init_local(&unwind_cursor, unwind_context);
162 unw_proc_info_t proc_info;
163 unw_get_proc_info(&unwind_cursor, &proc_info);
164 return proc_info.extra != 0;
165 }
166
167 // Walks the stack represented by |thread_state|, calling back to the provided
168 // lambda for each frame.
169 template <typename StackFrameCallback>
170 void WalkStack(const x86_thread_state64_t& thread_state,
171 uintptr_t stack_top,
172 const StackFrameCallback& callback) {
173 size_t frame_count = 0;
174 // This uses libunwind to walk the stack. libunwind is designed to be used for
175 // a thread to walk its own stack. This creates two problems.
176
177 // Problem 1: There is no official way to create a unw_context other than to
178 // create it from the current state of the current thread's stack. To get
179 // around this, forge a context. A unw_context is just a copy of the 16 main
180 // registers followed by the instruction pointer, nothing more.
181 // Coincidentally, the first 17 items of the x86_thread_state64_t type are
182 // exactly those registers in exactly the same order, so just bulk copy them
183 // over.
184 unw_context_t unwind_context;
185 memcpy(&unwind_context, &thread_state, sizeof(uintptr_t) * 17);
186 bool result = WalkStackFromContext(&unwind_context, &frame_count, callback);
187
188 if (!result)
189 return;
190
191 if (frame_count == 1) {
192 // Problem 2: Because libunwind is designed to be triggered by user code on
193 // their own thread, if it hits a library that has no unwind info for the
194 // function that is being executed, it just stops. This isn't a problem in
195 // the normal case, but in this case, it's quite possible that the stack
196 // being walked is stopped in a function that bridges to the kernel and thus
197 // is missing the unwind info.
198 //
199 // If so, cheat by scanning the stack and trying again. Only do this if the
200 // first time using libunwind fails after one frame.
201 bool ip_in_valid_image = false;
202 auto& rsp = unwind_context.data[7];
203 auto& rip = unwind_context.data[16];
204 const uintptr_t kMaxScanDepth = 50;
Mike Wittman 2017/03/30 16:56:46 nit: call out that this is in bytes
Avi (use Gerrit) 2017/03/30 20:13:38 Done, though that's a really weird depth. Fifty?
205 uintptr_t scan_limit = std::min<uintptr_t>(stack_top, rsp + kMaxScanDepth);
206 do {
207 rip = *reinterpret_cast<uintptr_t*>(rsp); // rip = *rsp
208 rsp += sizeof(uintptr_t); // rsp++
209 if (rsp % sizeof(uintptr_t)) {
210 // The "stack pointer" isn't aligned. Just give up.
211 return;
212 }
213
214 ip_in_valid_image = IsIPInValidImage(&unwind_context);
215 } while (!ip_in_valid_image && rsp < scan_limit);
216
217 if (ip_in_valid_image)
218 WalkStackFromContext(&unwind_context, &frame_count, callback);
219 }
220 }
221
222 // Module identifiers ---------------------------------------------------------
223
224 // Returns the hex encoding of a 16-byte ID for the binary loaded at
225 // |module_addr|. Returns an empty string if the UUID cannot be found at
226 // |module_addr|.
227 std::string GetUniqueId(const void* module_addr) {
228 const mach_header_64* mach_header =
229 reinterpret_cast<const mach_header_64*>(module_addr);
230 DCHECK_EQ(MH_MAGIC_64, mach_header->magic);
231
232 size_t offset = sizeof(mach_header_64);
233 size_t offset_limit = sizeof(mach_header_64) + mach_header->sizeofcmds;
234 for (uint32_t i = 0; (i < mach_header->ncmds) &&
235 (offset + sizeof(load_command) < offset_limit);
236 ++i) {
237 const load_command* current_cmd = reinterpret_cast<const load_command*>(
238 reinterpret_cast<const uint8_t*>(mach_header) + offset);
239
240 if (offset + current_cmd->cmdsize > offset_limit) {
241 // This command runs off the end of the command list. This is malformed.
242 return std::string();
243 }
244
245 if (current_cmd->cmd == LC_UUID) {
246 if (current_cmd->cmdsize < sizeof(uuid_command)) {
247 // This "UUID command" is too small. This is malformed.
248 return std::string();
249 }
250
251 const uuid_command* uuid_cmd =
252 reinterpret_cast<const uuid_command*>(current_cmd);
253 static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
254 "UUID field of UUID command should be 16 bytes.");
255 return HexEncode(&uuid_cmd->uuid, sizeof(uuid_cmd->uuid));
256 }
257 offset += current_cmd->cmdsize;
258 }
259 return std::string();
260 }
261
262 // Gets the index for the Module containing |instruction_pointer| in
263 // |modules|, adding it if it's not already present. Returns
264 // StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
265 // determined for |module|.
266 size_t GetModuleIndex(const uintptr_t instruction_pointer,
267 std::vector<StackSamplingProfiler::Module>* modules,
268 std::map<const void*, size_t>* profile_module_index) {
269 Dl_info inf;
270 if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
271 return StackSamplingProfiler::Frame::kUnknownModuleIndex;
272
273 auto module_index = profile_module_index->find(inf.dli_fbase);
274 if (module_index == profile_module_index->end()) {
275 StackSamplingProfiler::Module module(
276 reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
277 base::FilePath(inf.dli_fname));
278 modules->push_back(module);
279 module_index =
280 profile_module_index
281 ->insert(std::make_pair(inf.dli_fbase, modules->size() - 1))
282 .first;
283 }
284 return module_index->second;
285 }
286
287 // ScopedSuspendThread --------------------------------------------------------
288
289 // Suspends a thread for the lifetime of the object.
290 class ScopedSuspendThread {
291 public:
292 explicit ScopedSuspendThread(mach_port_t thread_port)
293 : thread_port_(thread_suspend(thread_port) == KERN_SUCCESS
294 ? thread_port
295 : MACH_PORT_NULL) {}
296
297 ~ScopedSuspendThread() {
298 if (!was_successful())
299 return;
300
301 kern_return_t kr = thread_resume(thread_port_);
302 MACH_CHECK(kr == KERN_SUCCESS, kr) << "thread_resume";
303 }
304
305 bool was_successful() const { return thread_port_ != MACH_PORT_NULL; }
306
307 private:
308 mach_port_t thread_port_;
309
310 DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
311 };
312
313 // NativeStackSamplerMac ------------------------------------------------------
314
315 class NativeStackSamplerMac : public NativeStackSampler {
316 public:
317 NativeStackSamplerMac(mach_port_t thread_port,
318 AnnotateCallback annotator,
319 NativeStackSamplerTestDelegate* test_delegate);
320 ~NativeStackSamplerMac() override;
321
322 // StackSamplingProfiler::NativeStackSampler:
323 void ProfileRecordingStarting(
324 std::vector<StackSamplingProfiler::Module>* modules) override;
325 void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
326 void ProfileRecordingStopped() override;
327
328 private:
329 // Suspends the thread with |thread_port_|, copies its stack and resumes the
330 // thread, then records the stack frames and associated modules into |sample|.
331 void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample);
332
333 // Weak reference: Mach port for thread being profiled.
334 mach_port_t thread_port_;
335
336 const AnnotateCallback annotator_;
337
338 NativeStackSamplerTestDelegate* const test_delegate_;
339
340 // The stack base address corresponding to |thread_handle_|.
341 const void* const thread_stack_base_address_;
342
343 // Buffer to use for copies of the stack. We use the same buffer for all the
344 // samples to avoid the overhead of multiple allocations and frees.
345 const std::unique_ptr<unsigned char[]> stack_copy_buffer_;
346
347 // Weak. Points to the modules associated with the profile being recorded
348 // between ProfileRecordingStarting() and ProfileRecordingStopped().
349 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;
350
351 // Maps a module's base address to the corresponding Module's index within
352 // current_modules_.
353 std::map<const void*, size_t> profile_module_index_;
354
355 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
356 };
357
358 NativeStackSamplerMac::NativeStackSamplerMac(
359 mach_port_t thread_port,
360 AnnotateCallback annotator,
361 NativeStackSamplerTestDelegate* test_delegate)
362 : thread_port_(thread_port),
363 annotator_(annotator),
364 test_delegate_(test_delegate),
365 thread_stack_base_address_(
366 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
367 stack_copy_buffer_(new unsigned char[StackCopyBufferSize()]) {
368 DCHECK(annotator_);
369
370 // This class suspends threads, and those threads might be suspended in dyld.
371 // Therefore, for all the system functions that might be linked in dynamically
372 // that are used while threads are suspended, make calls to them to make sure
373 // that they are linked up.
374 x86_thread_state64_t thread_state;
375 GetThreadState(thread_port_, &thread_state);
376
377 StackCopyBufferSize();
Mark Mentovai 2017/03/30 02:33:26 No need to prime this separately, since it was alr
Mike Wittman 2017/03/30 16:56:46 I also endorse storing the value in a member varia
Avi (use Gerrit) 2017/03/30 20:13:38 Done.
378 }
379
380 NativeStackSamplerMac::~NativeStackSamplerMac() {}
381
382 void NativeStackSamplerMac::ProfileRecordingStarting(
383 std::vector<StackSamplingProfiler::Module>* modules) {
384 current_modules_ = modules;
385 profile_module_index_.clear();
386 }
387
388 void NativeStackSamplerMac::RecordStackSample(
389 StackSamplingProfiler::Sample* sample) {
390 DCHECK(current_modules_);
391
392 SuspendThreadAndRecordStack(sample);
393 }
394
395 void NativeStackSamplerMac::ProfileRecordingStopped() {
396 current_modules_ = nullptr;
397 }
398
399 void NativeStackSamplerMac::SuspendThreadAndRecordStack(
400 StackSamplingProfiler::Sample* sample) {
401 x86_thread_state64_t thread_state;
402
403 // Copy the stack.
404
405 uintptr_t new_stack_top = 0;
406 {
407 // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
408 // allocate memory, including indirectly via use of DCHECK/CHECK or other
409 // logging statements. Otherwise this code can deadlock on heap locks in the
410 // default heap acquired by the target thread before it was suspended.
411 ScopedSuspendThread suspend_thread(thread_port_);
412 if (!suspend_thread.was_successful())
413 return;
414
415 if (!GetThreadState(thread_port_, &thread_state))
416 return;
417 uintptr_t stack_top =
418 reinterpret_cast<uintptr_t>(thread_stack_base_address_);
419 uintptr_t stack_bottom = thread_state.__rsp;
420 if (stack_bottom >= stack_top)
421 return;
422 uintptr_t stack_size = stack_top - stack_bottom;
423
424 if (stack_size > StackCopyBufferSize())
425 return;
426
427 (*annotator_)(sample);
428
429 CopyStackAndRewritePointers(
430 reinterpret_cast<uintptr_t*>(stack_copy_buffer_.get()),
431 reinterpret_cast<uintptr_t*>(stack_bottom),
432 reinterpret_cast<uintptr_t*>(stack_top), &thread_state);
433
434 new_stack_top =
435 reinterpret_cast<uintptr_t>(stack_copy_buffer_.get()) + stack_size;
436 } // ScopedSuspendThread
437
438 if (test_delegate_)
439 test_delegate_->OnPreStackWalk();
440
441 // Walk the stack and record it.
442
443 // Reserve enough memory for most stacks, to avoid repeated allocations.
444 // Approximately 99.9% of recorded stacks are 128 frames or fewer.
445 sample->frames.reserve(128);
446
447 auto* current_modules = current_modules_;
448 auto* profile_module_index = &profile_module_index_;
449 WalkStack(
450 thread_state, new_stack_top,
451 [sample, current_modules, profile_module_index](uintptr_t frame_ip) {
452 sample->frames.push_back(StackSamplingProfiler::Frame(
453 frame_ip,
454 GetModuleIndex(frame_ip, current_modules, profile_module_index)));
455 });
456 }
457
458 } // namespace
459
460 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
461 PlatformThreadId thread_id,
462 AnnotateCallback annotator,
463 NativeStackSamplerTestDelegate* test_delegate) {
464 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator,
465 test_delegate);
466 }
467
468 } // namespace base
OLDNEW
« no previous file with comments | « base/BUILD.gn ('k') | base/profiler/native_stack_sampler_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698