Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(143)

Side by Side Diff: base/profiler/native_stack_sampler_mac.cc

Issue 2702463003: NativeStackSampler implementation for Mac. (Closed)
Patch Set: guess not re ios Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/BUILD.gn ('k') | base/profiler/stack_sampling_profiler_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/profiler/native_stack_sampler.h"
6
7 #include <dlfcn.h>
8 #include <libkern/OSByteOrder.h>
9 #include <libunwind.h>
10 #include <mach-o/swap.h>
11 #include <mach/kern_return.h>
12 #include <mach/mach.h>
13 #include <mach/thread_act.h>
14 #include <pthread.h>
15 #include <sys/syslimits.h>
16
17 #include <map>
18 #include <memory>
19
20 #include "base/logging.h"
21 #include "base/macros.h"
22 #include "base/memory/ptr_util.h"
23 #include "base/strings/string_number_conversions.h"
24
25 namespace base {
26
27 namespace {
28
29 // Stack walking --------------------------------------------------------------
30
31 // Fills |state| with |target_thread|'s context.
32 //
33 // Note that this is called while a thread is suspended. Make very very sure
34 // that no shared resources (e.g. memory allocators) are used for the duration
35 // of this function.
36 bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
37 mach_msg_type_number_t count =
38 static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
39 return thread_get_state(target_thread, x86_THREAD_STATE64,
40 reinterpret_cast<thread_state_t>(state),
41 &count) == KERN_SUCCESS;
42 }
43
44 // If the value at |pointer| points to the original stack, rewrites it to point
45 // to the corresponding location in the copied stack.
46 //
47 // Note that this is called while a thread is suspended. Make very very sure
48 // that no shared resources (e.g. memory allocators) are used for the duration
49 // of this function.
50 uint64_t RewritePointerIfInOriginalStack(uint64_t* original_stack_bottom,
51 uint64_t* original_stack_top,
52 uint64_t* stack_copy_bottom,
53 uint64_t pointer) {
54 uint64_t original_stack_bottom_int =
55 reinterpret_cast<uint64_t>(original_stack_bottom);
56 uint64_t original_stack_top_int =
57 reinterpret_cast<uint64_t>(original_stack_top);
58 uint64_t stack_copy_bottom_int =
59 reinterpret_cast<uint64_t>(stack_copy_bottom);
60
61 if ((pointer < original_stack_bottom_int) ||
62 (pointer >= original_stack_top_int)) {
63 return pointer;
64 }
65
66 return stack_copy_bottom_int + (pointer - original_stack_bottom_int);
67 }
68
69 // Copies the stack to a buffer while rewriting possible pointers to locations
70 // within the stack to point to the corresponding locations in the copy. This is
71 // necessary to handle stack frames with dynamic stack allocation, where a
72 // pointer to the beginning of the dynamic allocation area is stored on the
73 // stack and/or in a non-volatile register.
74 //
75 // Returns the top of the stack in the stack copy.
76 //
77 // Eager rewriting of anything that looks like a pointer to the stack, as done
78 // in this function, does not adversely affect the stack unwinding. The only
79 // other values on the stack the unwinding depends on are return addresses,
80 // which should not point within the stack memory. The rewriting is guaranteed
81 // to catch all pointers because the stacks are guaranteed by the ABI to be
82 // sizeof(void*) aligned.
83 //
84 // Note that this is called while a thread is suspended. Make very very sure
85 // that no shared resources (e.g. memory allocators) are used for the duration
86 // of this function.
87 uint64_t CopyStackAndRewritePointers(void* dest,
88 void* from,
89 void* to,
90 x86_thread_state64_t* thread_state)
91 NO_SANITIZE("address") {
92 uint64_t* original_stack_bottom = static_cast<uint64_t*>(from);
93 uint64_t* original_stack_top = static_cast<uint64_t*>(to);
94 uint64_t* stack_copy_bottom = static_cast<uint64_t*>(dest);
95
96 size_t count = original_stack_top - original_stack_bottom;
97 for (size_t pos = 0; pos < count; ++pos) {
98 stack_copy_bottom[pos] = RewritePointerIfInOriginalStack(
99 original_stack_bottom, original_stack_top, stack_copy_bottom,
100 original_stack_bottom[pos]);
101 }
102
103 thread_state->__rbp =
104 RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
105 stack_copy_bottom, thread_state->__rbp);
106 thread_state->__rsp =
107 RewritePointerIfInOriginalStack(original_stack_bottom, original_stack_top,
108 stack_copy_bottom, thread_state->__rsp);
109
110 return reinterpret_cast<uint64_t>(stack_copy_bottom + count);
111 }
112
113 // Walks the stack represented by |unwind_context|, calling back to the provided
114 // lambda for each frame. Returns false if an error occurred, otherwise returns
115 // true.
116 template <typename StackFrameCallback>
117 bool WalkStackFromContext(unw_context_t* unwind_context,
118 size_t* frame_count,
119 const StackFrameCallback& callback) {
120 unw_cursor_t unwind_cursor;
121 unw_init_local(&unwind_cursor, unwind_context);
122
123 int step_result;
124 unw_word_t ip;
125 do {
126 ++(*frame_count);
127 unw_get_reg(&unwind_cursor, UNW_REG_IP, &ip);
128
129 callback(static_cast<uintptr_t>(ip));
130
131 step_result = unw_step(&unwind_cursor);
132 } while (step_result > 0);
133
134 if (step_result != 0)
135 return false;
136
137 return true;
138 }
139
140 // Walks the stack represented by |thread_state|, calling back to the provided
141 // lambda for each frame.
142 template <typename StackFrameCallback>
143 void WalkStack(const x86_thread_state64_t& thread_state,
144 uint64_t stack_top,
145 const StackFrameCallback& callback) {
146 size_t frame_count = 0;
147 // This uses libunwind to walk the stack. libunwind is designed to be used for
148 // a thread to walk its own stack. This creates two problems.
149
150 // Problem 1: There is no official way to create a unw_context other than to
151 // create it from the current state of the current thread's stack. To get
152 // around this, forge a context. A unw_context is just a copy of the 16 main
153 // registers followed by the instruction pointer, nothing more.
154 // Coincidentally, the first 17 items of the x86_thread_state64_t type are
155 // exactly those registers in exactly the same order, so just bulk copy them
156 // over.
157 unw_context_t unwind_context;
158 memcpy(&unwind_context, &thread_state, sizeof(uint64_t) * 17);
159 bool result = WalkStackFromContext(&unwind_context, &frame_count, callback);
160
161 if (!result)
162 return;
163
164 if (frame_count == 1) {
165 // Problem 2: Because libunwind is designed to be triggered by user code on
166 // their own thread, if it hits a library that has no unwind info for the
167 // function that is being executed, it just stops. This isn't a problem in
168 // the normal case, but in this case, it's quite possible that the stack
169 // being walked is stopped in a function that bridges to the kernel and thus
170 // is missing the unwind info.
171 //
172 // If so, cheat by scanning the stack and trying again. Only do this once,
173 // and only if the first time using libunwind fails after one frame.
174 bool ip_in_valid_image = false;
175 do {
Mike Wittman 2017/03/02 16:35:04 How about using aliases for the register state, to
Avi (use Gerrit) 2017/03/02 19:50:44 Done.
176 unwind_context.data[16] =
177 *reinterpret_cast<uint64_t*>(unwind_context.data[7]); // rip = *rsp
178 unwind_context.data[7] = unwind_context.data[7] + 8; // rsp++
179 unw_cursor_t unwind_cursor;
180 unw_init_local(&unwind_cursor, &unwind_context);
181 unw_proc_info_t proc_info;
182 unw_get_proc_info(&unwind_cursor, &proc_info);
183 ip_in_valid_image = proc_info.extra != 0;
Mike Wittman 2017/03/02 16:35:04 Move all these calls into an IsIPInValidImage(cons
Avi (use Gerrit) 2017/03/02 19:50:44 Done.
184 } while (!ip_in_valid_image && unwind_context.data[16] < stack_top);
Mike Wittman 2017/03/02 16:35:04 Shouldn't this be unwind_context.data[7] < stack_t
Avi (use Gerrit) 2017/03/02 19:50:44 Yes. Done.
185
186 if (ip_in_valid_image)
187 WalkStackFromContext(&unwind_context, &frame_count, callback);
188 }
189 }
190
191 // Module identifiers ---------------------------------------------------------
192
193 // Fills |id| with the UUID of the x86_64 Mach-O binary with the header
194 // |mach_header|. Returns false if the binary is malformed or does not contain
195 // the UUID load command.
196 bool GetUUID(const mach_header_64* mach_header, unsigned char* id) {
197 size_t offset = sizeof(mach_header_64);
198 size_t offset_limit = sizeof(mach_header_64) + mach_header->sizeofcmds;
199 for (uint32_t i = 0; (i < mach_header->ncmds) &&
200 (offset + sizeof(load_command) < offset_limit);
201 ++i) {
202 const load_command* current_cmd = reinterpret_cast<const load_command*>(
203 reinterpret_cast<const uint8_t*>(mach_header) + offset);
204
205 if (offset + current_cmd->cmdsize > offset_limit) {
206 // This command runs off the end of the command list. This is malformed.
207 return false;
208 }
209
210 if (current_cmd->cmd == LC_UUID) {
211 if (current_cmd->cmdsize < sizeof(uuid_command)) {
212 // This "UUID command" is too small. This is malformed.
213 return false;
214 }
215
216 const uuid_command* uuid_cmd =
217 reinterpret_cast<const uuid_command*>(current_cmd);
218 static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
219 "UUID field of UUID command should be 16 bytes.");
220 memcpy(id, &uuid_cmd->uuid, sizeof(uuid_t));
221 return true;
222 }
223 offset += current_cmd->cmdsize;
224 }
225 return false;
226 }
227
228 // Returns the hex encoding of a 16-byte ID for the binary loaded at
229 // |module_addr|. Returns an empty string if the UUID cannot be found at
230 // |module_addr|.
231 std::string GetUniqueId(const void* module_addr) {
232 const mach_header_64* mach_header =
233 reinterpret_cast<const mach_header_64*>(module_addr);
234 DCHECK_EQ(MH_MAGIC_64, mach_header->magic);
235
236 unsigned char id[sizeof(uuid_t)];
237 if (!GetUUID(mach_header, id))
238 return "";
239 return HexEncode(id, sizeof(uuid_t));
240 }
241
242 // Gets the index for the Module containing |instruction_pointer| in
243 // |modules|, adding it if it's not already present. Returns
244 // StackSamplingProfiler::Frame::kUnknownModuleIndex if no Module can be
245 // determined for |module|.
246 size_t GetModuleIndex(const uintptr_t instruction_pointer,
247 std::vector<StackSamplingProfiler::Module>* modules,
248 std::map<const void*, size_t>* profile_module_index) {
249 Dl_info inf;
250 if (!dladdr(reinterpret_cast<const void*>(instruction_pointer), &inf))
251 return StackSamplingProfiler::Frame::kUnknownModuleIndex;
252
253 auto module_index = profile_module_index->find(inf.dli_fbase);
254 if (module_index == profile_module_index->end()) {
255 StackSamplingProfiler::Module module(
256 reinterpret_cast<uintptr_t>(inf.dli_fbase), GetUniqueId(inf.dli_fbase),
257 base::FilePath(inf.dli_fname));
258 modules->push_back(module);
259 module_index =
260 profile_module_index
261 ->insert(std::make_pair(inf.dli_fbase, modules->size() - 1))
262 .first;
263 }
264 return module_index->second;
265 }
266
267 // ScopedSuspendThread --------------------------------------------------------
268
269 // Suspends a thread for the lifetime of the object.
270 class ScopedSuspendThread {
271 public:
272 explicit ScopedSuspendThread(mach_port_t thread_port);
273 ~ScopedSuspendThread();
274
275 bool was_successful() const { return was_successful_; }
276
277 private:
278 mach_port_t thread_port_;
279 bool was_successful_;
280
281 DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
282 };
283
284 ScopedSuspendThread::ScopedSuspendThread(mach_port_t thread_port)
285 : thread_port_(thread_port),
286 was_successful_(thread_suspend(thread_port) == KERN_SUCCESS) {}
287
288 ScopedSuspendThread::~ScopedSuspendThread() {
289 if (!was_successful_)
290 return;
291
292 kern_return_t resume_result = thread_resume(thread_port_);
293 CHECK_EQ(KERN_SUCCESS, resume_result) << "thread_resume failed";
294 }
295
296 // NativeStackSamplerMac ------------------------------------------------------
297
298 class NativeStackSamplerMac : public NativeStackSampler {
299 public:
300 NativeStackSamplerMac(mach_port_t thread_port,
301 AnnotateCallback annotator,
302 NativeStackSamplerTestDelegate* test_delegate);
303 ~NativeStackSamplerMac() override;
304
305 // StackSamplingProfiler::NativeStackSampler:
306 void ProfileRecordingStarting(
307 std::vector<StackSamplingProfiler::Module>* modules) override;
308 void RecordStackSample(StackSamplingProfiler::Sample* sample) override;
309 void ProfileRecordingStopped() override;
310
311 private:
312 enum {
313 // Intended to hold the largest stack used by Chrome. The default macOS main
314 // thread stack size is 8 MB, and this allows for expansion if it occurs.
315 kStackCopyBufferSize = 12 * 1024 * 1024
316 };
317
318 // Suspends the thread with |thread_port_|, copies its stack and resumes the
319 // thread, then records the stack frames and associated modules into |sample|.
320 void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample);
321
322 // Weak reference: Mach port for thread being profiled.
323 mach_port_t thread_port_;
324
325 const AnnotateCallback annotator_;
326
327 NativeStackSamplerTestDelegate* const test_delegate_;
328
329 // The stack base address corresponding to |thread_handle_|.
330 const void* const thread_stack_base_address_;
331
332 // Buffer to use for copies of the stack. We use the same buffer for all the
333 // samples to avoid the overhead of multiple allocations and frees.
334 const std::unique_ptr<unsigned char[]> stack_copy_buffer_;
335
336 // Weak. Points to the modules associated with the profile being recorded
337 // between ProfileRecordingStarting() and ProfileRecordingStopped().
338 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr;
339
340 // Maps a module's base address to the corresponding Module's index within
341 // current_modules_.
342 std::map<const void*, size_t> profile_module_index_;
343
344 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac);
345 };
346
347 NativeStackSamplerMac::NativeStackSamplerMac(
348 mach_port_t thread_port,
349 AnnotateCallback annotator,
350 NativeStackSamplerTestDelegate* test_delegate)
351 : thread_port_(thread_port),
352 annotator_(annotator),
353 test_delegate_(test_delegate),
354 thread_stack_base_address_(
355 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))),
356 stack_copy_buffer_(new unsigned char[kStackCopyBufferSize]) {
357 DCHECK(annotator_);
358
359 // This class suspends threads, and those threads might be suspended in dyld.
360 // Therefore, for all the system functions that might be linked in dynamically
361 // that are used while threads are suspended, make calls to them to make sure
362 // that they are linked up.
363 x86_thread_state64_t thread_state;
364 GetThreadState(thread_port_, &thread_state);
365 }
366
367 NativeStackSamplerMac::~NativeStackSamplerMac() {}
368
369 void NativeStackSamplerMac::ProfileRecordingStarting(
370 std::vector<StackSamplingProfiler::Module>* modules) {
371 current_modules_ = modules;
372 profile_module_index_.clear();
373 }
374
375 void NativeStackSamplerMac::RecordStackSample(
376 StackSamplingProfiler::Sample* sample) {
377 DCHECK(current_modules_);
378
379 if (!stack_copy_buffer_)
380 return;
381
382 SuspendThreadAndRecordStack(sample);
383 }
384
385 void NativeStackSamplerMac::ProfileRecordingStopped() {
386 current_modules_ = nullptr;
387 }
388
389 void NativeStackSamplerMac::SuspendThreadAndRecordStack(
390 StackSamplingProfiler::Sample* sample) {
391 x86_thread_state64_t thread_state;
392
393 // Copy the stack.
394
395 uint64_t new_stack_top;
Mike Wittman 2017/03/02 16:35:03 initialize to 0
Avi (use Gerrit) 2017/03/02 19:50:44 Done.
396 {
397 // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might
398 // allocate memory, including indirectly via use of DCHECK/CHECK or other
399 // logging statements. Otherwise this code can deadlock on heap locks in the
400 // default heap acquired by the target thread before it was suspended.
401 ScopedSuspendThread suspend_thread(thread_port_);
402 if (!suspend_thread.was_successful())
403 return;
404
405 if (!GetThreadState(thread_port_, &thread_state))
406 return;
407 uint64_t stack_top = reinterpret_cast<uint64_t>(thread_stack_base_address_);
408 uint64_t stack_bottom = thread_state.__rsp;
409
410 if ((stack_top - stack_bottom) > kStackCopyBufferSize)
411 return;
412
413 (*annotator_)(sample);
414
415 new_stack_top = CopyStackAndRewritePointers(
416 stack_copy_buffer_.get(), reinterpret_cast<void*>(stack_bottom),
417 reinterpret_cast<void*>(stack_top), &thread_state);
418 } // ScopedSuspendThread
Mike Wittman 2017/03/02 16:35:03 nit: new_stack_top could be computed directly here
Avi (use Gerrit) 2017/03/02 19:50:44 Done.
419
420 if (test_delegate_)
421 test_delegate_->OnPreStackWalk();
422
423 // Walk the stack and record it.
424
425 // Reserve enough memory for most stacks, to avoid repeated allocations.
426 // Approximately 99.9% of recorded stacks are 128 frames or fewer.
427 sample->frames.reserve(128);
428
429 auto current_modules = current_modules_;
430 auto profile_module_index = &profile_module_index_;
431 WalkStack(
432 thread_state, new_stack_top,
433 [sample, current_modules, profile_module_index](uintptr_t frame_ip) {
434 sample->frames.push_back(StackSamplingProfiler::Frame(
435 frame_ip,
436 GetModuleIndex(frame_ip, current_modules, profile_module_index)));
437 });
438 }
439
440 } // namespace
441
442 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create(
443 PlatformThreadId thread_id,
444 AnnotateCallback annotator,
445 NativeStackSamplerTestDelegate* test_delegate) {
446 #if !defined(__x86_64__)
447 // No.
448 return nullptr;
449 #endif
450 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator,
451 test_delegate);
452 }
453
454 } // namespace base
OLDNEW
« no previous file with comments | « base/BUILD.gn ('k') | base/profiler/stack_sampling_profiler_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698