Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(666)

Side by Side Diff: base/profiler/native_stack_sampler_mac.cc

Issue 2702463003: NativeStackSampler implementation for Mac. (Closed)
Patch Set: rev Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/BUILD.gn ('k') | base/profiler/native_stack_sampler_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/profiler/native_stack_sampler.h"
6
7 #include <dlfcn.h>
8 #include <libkern/OSByteOrder.h>
9 #include <libunwind.h>
10 #include <mach-o/swap.h>
11 #include <mach/kern_return.h>
12 #include <mach/mach.h>
13 #include <mach/thread_act.h>
14 #include <pthread.h>
15 #include <sys/syslimits.h>
16
17 #include <algorithm>
18 #include <map>
19 #include <memory>
20
21 #include "base/logging.h"
22 #include "base/mac/mach_logging.h"
23 #include "base/macros.h"
24 #include "base/memory/ptr_util.h"
25 #include "base/strings/string_number_conversions.h"
26
27 namespace base {
28
29 namespace {
30
31 // Stack walking --------------------------------------------------------------
32
33 // Fills |state| with |target_thread|'s context.
34 //
35 // Note that this is called while a thread is suspended. Make very very sure
36 // that no shared resources (e.g. memory allocators) are used for the duration
37 // of this function.
38 bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
39 mach_msg_type_number_t count =
40 static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
41 return thread_get_state(target_thread, x86_THREAD_STATE64,
42 reinterpret_cast<thread_state_t>(state),
43 &count) == KERN_SUCCESS;
44 }
45
46 // If the value at |pointer| points to the original stack, rewrites it to point
47 // to the corresponding location in the copied stack.
48 //
49 // Note that this is called while a thread is suspended. Make very very sure
50 // that no shared resources (e.g. memory allocators) are used for the duration
51 // of this function.
52 uintptr_t RewritePointerIfInOriginalStack(uintptr_t* original_stack_bottom,
53 uintptr_t* original_stack_top,
54 uintptr_t* stack_copy_bottom,
55 uintptr_t pointer) {
56 uintptr_t original_stack_bottom_int =
57 reinterpret_cast<uintptr_t>(original_stack_bottom);
58 uintptr_t original_stack_top_int =
59 reinterpret_cast<uintptr_t>(original_stack_top);
60 uintptr_t stack_copy_bottom_int =
61 reinterpret_cast<uintptr_t>(stack_copy_bottom);
62
63 if ((pointer < original_stack_bottom_int) ||
64 (pointer >= original_stack_top_int)) {
65 return pointer;
66 }
67
68 return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
69 }
70
71 // Copies the stack to a buffer while rewriting possible pointers to locations
72 // within the stack to point to the corresponding locations in the copy. This is
73 // necessary to handle stack frames with dynamic stack allocation, where a
74 // pointer to the beginning of the dynamic allocation area is stored on the
75 // stack and/or in a non-volatile register.
76 //
77 // Eager rewriting of anything that looks like a pointer to the stack, as done
78 // in this function, does not adversely affect the stack unwinding. The only
79 // other values on the stack the unwinding depends on are return addresses,
80 // which should not point within the stack memory. The rewriting is guaranteed
81 // to catch all pointers because the stacks are guaranteed by the ABI to be
82 // sizeof(void*) aligned.
83 //
84 // Note that this is called while a thread is suspended. Make very very sure
85 // that no shared resources (e.g. memory allocators) are used for the duration
86 // of this function.
87 void CopyStackAndRewritePointers(uintptr_t* stack_copy_bottom,
88 uintptr_t* original_stack_bottom,
89 uintptr_t* original_stack_top,
90 x86_thread_state64_t* thread_state)
91 NO_SANITIZE("address") {
92 size_t count = original_stack_top - original_stack_bottom;
93 for (size_t pos = 0; pos < count; ++pos) {
94 stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
95 original_stack_bottom, original_stack_top, stack_copy_bottom,
96 original_stack_bottom[pos]);
97 }
98
99 uint64_t* rewrite_registers[] = {&thread_state->__rbx, &thread_state->__rbp,
100 &thread_state->__rsp, &thread_state->__r12,
101 &thread_state->__r13, &thread_state->__r14,
102 &thread_state->__r15};
103 for (auto* reg : rewrite_registers) {
104 *reg = RewritePointerIfInOriginalStack(
105 original_stack_bottom, original_stack_top, stack_copy_bottom, *reg);
106 }
107 }
108
109 // Walks the stack represented by |unwind_context|, calling back to the provided
110 // lambda for each frame. Returns false if an error occurred, otherwise returns
111 // true.
112 template <typename StackFrameCallback>
113 bool WalkStackFromContext(unw_context_t* unwind_context,
114 size_t* frame_count,
115 const StackFrameCallback& callback) {
116 unw_cursor_t unwind_cursor;
117 unw_init_local(&unwind_cursor, unwind_context);
118
119 int step_result;
120 unw_word_t ip;
121 do {
122 ++(*frame_count);
123 unw_get_reg(&unwind_cursor, UNW_REG_IP, &ip);
124
125 callback(static_cast<uintptr_t>(ip));
126
127 step_result = unw_step(&unwind_cursor);
128 } while (step_result > 0);
129
130 if (step_result != 0)
131 return false;
132
133 return true;
134 }
135
136 bool IsIPInValidImage(unw_context_t* unwind_context) {
137 unw_cursor_t unwind_cursor;
138 unw_init_local(&unwind_cursor, unwind_context);
139 unw_proc_info_t proc_info;
140 unw_get_proc_info(&unwind_cursor, &proc_info);
141 return proc_info.extra != 0;
142 }
143
144 // Walks the stack represented by |thread_state|, calling back to the provided
145 // lambda for each frame.
146 template <typename StackFrameCallback>
147 void WalkStack(const x86_thread_state64_t& thread_state,
148 uintptr_t stack_top,
149 const StackFrameCallback& callback) {
150 size_t frame_count = 0;
151 // This uses libunwind to walk the stack. libunwind is designed to be used for
152 // a thread to walk its own stack. This creates two problems.
153
154 // Problem 1: There is no official way to create a unw_context other than to
155 // create it from the current state of the current thread's stack. To get
156 // around this, forge a context. A unw_context is just a copy of the 16 main
157 // registers followed by the instruction pointer, nothing more.
158 // Coincidentally, the first 17 items of the x86_thread_state64_t type are
159 // exactly those registers in exactly the same order, so just bulk copy them
160 // over.
161 unw_context_t unwind_context;
162 memcpy(&unwind_context, &thread_state, sizeof(uintptr_t) * 17);
163 bool result = WalkStackFromContext(&unwind_context, &frame_count, callback);
164
165 if (!result)
166 return;
167
168 if (frame_count == 1) {
169 // Problem 2: Because libunwind is designed to be triggered by user code on
170 // their own thread, if it hits a library that has no unwind info for the
171 // function that is being executed, it just stops. This isn't a problem in
172 // the normal case, but in this case, it's quite possible that the stack
173 // being walked is stopped in a function that bridges to the kernel and thus
174 // is missing the unwind info.
175 //
176 // If so, cheat by scanning the stack and trying again. Only do this if the
177 // first time using libunwind fails after one frame.
178 bool ip_in_valid_image = false;
179 auto& rsp = unwind_context.data[7];
180 auto& rip = unwind_context.data[16];
181 const uintptr_t kMaxScanDepth = 50;
182 uintptr_t scan_limit = std::min<uintptr_t>(stack_top, rsp + kMaxScanDepth);
183 do {
184 rip = *reinterpret_cast<uintptr_t*>(rsp); // rip = *rsp
185 rsp += sizeof(uintptr_t); // rsp++
186 if (rsp % sizeof(uintptr_t)) {
187 // The "stack pointer" isn't aligned. Just give up.
188 return;
189 }
190
191 ip_in_valid_image = IsIPInValidImage(&unwind_context);
192 } while (!ip_in_valid_image && rsp < scan_limit);
193
194 if (ip_in_valid_image)
195 WalkStackFromContext(&unwind_context, &frame_count, callback);
196 }
197 }
198
199 // Module identifiers ---------------------------------------------------------
200
201 // Returns the hex encoding of a 16-byte ID for the binary loaded at
202 // |module_addr|. Returns an empty string if the UUID cannot be found at
203 // |module_addr|.
204 std::string GetUniqueId(const void* module_addr) {
205 const mach_header_64* mach_header =
206 reinterpret_cast<const mach_header_64*>(module_addr);
207 DCHECK_EQ(MH_MAGIC_64, mach_header->magic);
208
209 size_t offset = sizeof(mach_header_64);
210 size_t offset_limit = sizeof(mach_header_64) + mach_header->sizeofcmds;
211 for (uint32_t i = 0; (i < mach_header->ncmds) &&
212 (offset + sizeof(load_command) < offset_limit);
213 ++i) {
214 const load_command* current_cmd = reinterpret_cast<const load_command*>(
215 reinterpret_cast<const uint8_t*>(mach_header) + offset);
216
217 if (offset + current_cmd->cmdsize > offset_limit) {
218 // This command runs off the end of the command list. This is malformed.
219 return std::string();
220 }
221
222 if (current_cmd->cmd == LC_UUID) {
223 if (current_cmd->cmdsize < sizeof(uuid_command)) {
224 // This "UUID command" is too small. This is malformed.
225 return std::string();
226 }
227
228 const uuid_command* uuid_cmd =
229 reinterpret_cast<const uuid_command*>(current_cmd);
230 static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
231 "UUID field of UUID command should be 16 bytes.");
232 return HexEncode(&uuid_cmd->uuid, sizeof(uuid_cmd->uuid));
233 }
234 offset += current_cmd->cmdsize;
235 }
236 return std::string();
237 }
238
239 // Gets the index for the Module containing |instruction_pointer| in
240 // |modules|, adding it if it's not already present. Returns
241 // StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
242 // determined for |module|.
243 size_t GetModuleIndex(const uintptr_t instruction_pointer,
244 std::vector<StackSamplingProfiler::Module>* modules,
245 std::map<const void*, size_t>* profile_module_index) {
246 Dl_info inf;
247 if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
248 return StackSamplingProfiler::Frame::kUnknownModuleIndex;
249
250 auto module_index = profile_module_index->find(inf.dli_fbase);
251 if (module_index == profile_module_index->end()) {
252 StackSamplingProfiler::Module module(
253 reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
254 base::FilePath(inf.dli_fname));
255 modules->push_back(module);
256 module_index =
257 profile_module_index
258 ->insert(std::make_pair(inf.dli_fbase, modules->size() - 1))
259 .first;
260 }
261 return module_index->second;
262 }
263
264 // ScopedSuspendThread --------------------------------------------------------
265
266 // Suspends a thread for the lifetime of the object.
267 class ScopedSuspendThread {
268 public:
269 explicit ScopedSuspendThread(mach_port_t thread_port)
270 : thread_port_(thread_suspend(thread_port) == KERN_SUCCESS
271 ? thread_port
272 : MACH_PORT_NULL) {}
273
274 ~ScopedSuspendThread() {
275 if (!was_successful())
276 return;
277
278 kern_return_t kr = thread_resume(thread_port_);
279 MACH_CHECK(kr == KERN_SUCCESS, kr) << "thread_resume";
280 }
281
282 bool was_successful() const { return thread_port_ != MACH_PORT_NULL; }
283
284 private:
285 mach_port_t thread_port_;
286
287 DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
288 };
289
290 // NativeStackSamplerMac ------------------------------------------------------
291
292 class NativeStackSamplerMac : public NativeStackSampler {
293 public:
294 NativeStackSamplerMac(mach_port_t thread_port,
295 AnnotateCallback annotator,
296 NativeStackSamplerTestDelegate* test_delegate);
297 ~NativeStackSamplerMac() override;
298
299 // StackSamplingProfiler::NativeStackSampler:
300 void ProfileRecordingStarting(
301 std::vector<StackSamplingProfiler::Module>* modules) override;
302 void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
303 void ProfileRecordingStopped() override;
304
305 private:
306 // Intended to hold the largest stack used by Chrome. The default macOS main
307 // thread stack size is 8 MB, and this allows for expansion if it occurs.
308 static constexpr size_t kStackCopyBufferSize = 12 * 1024 * 1024;
Mark Mentovai 2017/03/27 18:24:55 You could also getrlimit(RLIMIT_STACK) to compute
Avi (use Gerrit) 2017/03/29 17:52:09 Right, but that's for the main thread and this is
Mike Wittman 2017/03/29 18:08:09 I'd like to keep this as a single buffer size that
Avi (use Gerrit) 2017/03/29 19:04:15 Acknowledged.
Mark Mentovai 2017/03/29 19:23:49 I don't really think a non-main thread would use m
Avi (use Gerrit) 2017/03/29 19:59:25 Yes, I see the use of getrlimit in platform_thread
Mike Wittman 2017/03/29 20:03:02 It's still a single buffer size across all threads
309
310 // Suspends the thread with |thread_port_|, copies its stack and resumes the
311 // thread, then records the stack frames and associated modules into |sample|.
312 void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample);
313
314 // Weak reference: Mach port for thread being profiled.
315 mach_port_t thread_port_;
316
317 const AnnotateCallback annotator_;
318
319 NativeStackSamplerTestDelegate* const test_delegate_;
320
321 // The stack base address corresponding to |thread_handle_|.
322 const void* const thread_stack_base_address_;
323
324 // Buffer to use for copies of the stack. We use the same buffer for all the
325 // samples to avoid the overhead of multiple allocations and frees.
326 const std::unique_ptr<unsigned char[]> stack_copy_buffer_;
327
328 // Weak. Points to the modules associated with the profile being recorded
329 // between ProfileRecordingStarting() and ProfileRecordingStopped().
330 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;
331
332 // Maps a module's base address to the corresponding Module's index within
333 // current_modules_.
334 std::map<const void*, size_t> profile_module_index_;
335
336 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
337 };
338
339 NativeStackSamplerMac::NativeStackSamplerMac(
340 mach_port_t thread_port,
341 AnnotateCallback annotator,
342 NativeStackSamplerTestDelegate* test_delegate)
343 : thread_port_(thread_port),
344 annotator_(annotator),
345 test_delegate_(test_delegate),
346 thread_stack_base_address_(
347 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
348 stack_copy_buffer_(new unsigned char[kStackCopyBufferSize]) {
349 DCHECK(annotator_);
350
351 // This class suspends threads, and those threads might be suspended in dyld.
352 // Therefore, for all the system functions that might be linked in dynamically
353 // that are used while threads are suspended, make calls to them to make sure
354 // that they are linked up.
355 x86_thread_state64_t thread_state;
356 GetThreadState(thread_port_, &thread_state);
357 }
358
359 NativeStackSamplerMac::~NativeStackSamplerMac() {}
360
361 void NativeStackSamplerMac::ProfileRecordingStarting(
362 std::vector<StackSamplingProfiler::Module>* modules) {
363 current_modules_ = modules;
364 profile_module_index_.clear();
365 }
366
367 void NativeStackSamplerMac::RecordStackSample(
368 StackSamplingProfiler::Sample* sample) {
369 DCHECK(current_modules_);
370
371 SuspendThreadAndRecordStack(sample);
372 }
373
374 void NativeStackSamplerMac::ProfileRecordingStopped() {
375 current_modules_ = nullptr;
376 }
377
378 void NativeStackSamplerMac::SuspendThreadAndRecordStack(
379 StackSamplingProfiler::Sample* sample) {
380 x86_thread_state64_t thread_state;
381
382 // Copy the stack.
383
384 uintptr_t new_stack_top = 0;
385 {
386 // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
387 // allocate memory, including indirectly via use of DCHECK/CHECK or other
388 // logging statements. Otherwise this code can deadlock on heap locks in the
389 // default heap acquired by the target thread before it was suspended.
390 ScopedSuspendThread suspend_thread(thread_port_);
391 if (!suspend_thread.was_successful())
392 return;
393
394 if (!GetThreadState(thread_port_, &thread_state))
395 return;
396 uintptr_t stack_top =
397 reinterpret_cast<uintptr_t>(thread_stack_base_address_);
398 uintptr_t stack_bottom = thread_state.__rsp;
399 if (stack_bottom >= stack_top)
400 return;
401 uintptr_t stack_size = stack_top - stack_bottom;
402
403 if (stack_size > kStackCopyBufferSize)
404 return;
405
406 (*annotator_)(sample);
407
408 CopyStackAndRewritePointers(
409 reinterpret_cast<uintptr_t*>(stack_copy_buffer_.get()),
410 reinterpret_cast<uintptr_t*>(stack_bottom),
411 reinterpret_cast<uintptr_t*>(stack_top), &thread_state);
412
413 new_stack_top =
414 reinterpret_cast<uintptr_t>(stack_copy_buffer_.get()) + stack_size;
415 } // ScopedSuspendThread
416
417 if (test_delegate_)
418 test_delegate_->OnPreStackWalk();
419
420 // Walk the stack and record it.
421
422 // Reserve enough memory for most stacks, to avoid repeated allocations.
423 // Approximately 99.9% of recorded stacks are 128 frames or fewer.
424 sample->frames.reserve(128);
425
426 auto* current_modules = current_modules_;
427 auto* profile_module_index = &profile_module_index_;
428 WalkStack(
429 thread_state, new_stack_top,
430 [sample, current_modules, profile_module_index](uintptr_t frame_ip) {
431 sample->frames.push_back(StackSamplingProfiler::Frame(
432 frame_ip,
433 GetModuleIndex(frame_ip, current_modules, profile_module_index)));
434 });
435 }
436
437 } // namespace
438
439 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
440 PlatformThreadId thread_id,
441 AnnotateCallback annotator,
442 NativeStackSamplerTestDelegate* test_delegate) {
443 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator,
444 test_delegate);
445 }
446
447 } // namespace base
OLDNEW
« no previous file with comments | « base/BUILD.gn ('k') | base/profiler/native_stack_sampler_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698