OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/profiler/native_stack_sampler.h" | 5 #include "base/profiler/native_stack_sampler.h" |
6 | 6 |
7 #include <dlfcn.h> | 7 #include <dlfcn.h> |
8 #include <libkern/OSByteOrder.h> | 8 #include <libkern/OSByteOrder.h> |
9 #include <libunwind.h> | 9 #include <libunwind.h> |
10 #include <mach-o/swap.h> | 10 #include <mach-o/swap.h> |
(...skipping 11 matching lines...) Expand all Loading... |
22 #include "base/logging.h" | 22 #include "base/logging.h" |
23 #include "base/mac/mach_logging.h" | 23 #include "base/mac/mach_logging.h" |
24 #include "base/macros.h" | 24 #include "base/macros.h" |
25 #include "base/memory/ptr_util.h" | 25 #include "base/memory/ptr_util.h" |
26 #include "base/strings/string_number_conversions.h" | 26 #include "base/strings/string_number_conversions.h" |
27 | 27 |
28 namespace base { | 28 namespace base { |
29 | 29 |
30 namespace { | 30 namespace { |
31 | 31 |
32 // Miscellaneous -------------------------------------------------------------- | |
33 | |
34 size_t StackCopyBufferSize() { | |
35 static size_t stack_size = 0; | |
36 if (stack_size) | |
37 return stack_size; | |
38 | |
39 // In platform_thread_mac's GetDefaultThreadStackSize(), RLIMIT_STACK is used | |
40 // for all stacks, not just the main thread's, so it is good for use here. | |
41 struct rlimit stack_rlimit; | |
42 if (getrlimit(RLIMIT_STACK, &stack_rlimit) == 0 && | |
43 stack_rlimit.rlim_cur != RLIM_INFINITY) { | |
44 stack_size = stack_rlimit.rlim_cur; | |
45 return stack_size; | |
46 } | |
47 | |
48 // If getrlimit somehow fails, return the default macOS main thread stack size | |
49 // of 8 MB (DFLSSIZ in <i386/vmparam.h>) with extra wiggle room. | |
50 return 12 * 1024 * 1024; | |
51 } | |
52 | |
53 // Stack walking -------------------------------------------------------------- | 32 // Stack walking -------------------------------------------------------------- |
54 | 33 |
55 // Fills |state| with |target_thread|'s context. | 34 // Fills |state| with |target_thread|'s context. |
56 // | 35 // |
57 // Note that this is called while a thread is suspended. Make very very sure | 36 // Note that this is called while a thread is suspended. Make very very sure |
58 // that no shared resources (e.g. memory allocators) are used for the duration | 37 // that no shared resources (e.g. memory allocators) are used for the duration |
59 // of this function. | 38 // of this function. |
60 bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) { | 39 bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) { |
61 mach_msg_type_number_t count = | 40 mach_msg_type_number_t count = |
62 static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT); | 41 static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT); |
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
316 class NativeStackSamplerMac : public NativeStackSampler { | 295 class NativeStackSamplerMac : public NativeStackSampler { |
317 public: | 296 public: |
318 NativeStackSamplerMac(mach_port_t thread_port, | 297 NativeStackSamplerMac(mach_port_t thread_port, |
319 AnnotateCallback annotator, | 298 AnnotateCallback annotator, |
320 NativeStackSamplerTestDelegate* test_delegate); | 299 NativeStackSamplerTestDelegate* test_delegate); |
321 ~NativeStackSamplerMac() override; | 300 ~NativeStackSamplerMac() override; |
322 | 301 |
323 // StackSamplingProfiler::NativeStackSampler: | 302 // StackSamplingProfiler::NativeStackSampler: |
324 void ProfileRecordingStarting( | 303 void ProfileRecordingStarting( |
325 std::vector<StackSamplingProfiler::Module>* modules) override; | 304 std::vector<StackSamplingProfiler::Module>* modules) override; |
326 void RecordStackSample(StackSamplingProfiler::Sample* sample) override; | 305 void RecordStackSample(StackBuffer* stack_buffer, |
327 void ProfileRecordingStopped() override; | 306 StackSamplingProfiler::Sample* sample) override; |
| 307 void ProfileRecordingStopped(StackBuffer* stack_buffer) override; |
328 | 308 |
329 private: | 309 private: |
330 // Suspends the thread with |thread_port_|, copies its stack and resumes the | 310 // Suspends the thread with |thread_port_|, copies its stack and resumes the |
331 // thread, then records the stack frames and associated modules into |sample|. | 311 // thread, then records the stack frames and associated modules into |sample|. |
332 void SuspendThreadAndRecordStack(StackSamplingProfiler::Sample* sample); | 312 void SuspendThreadAndRecordStack(StackBuffer* stack_buffer, |
| 313 StackSamplingProfiler::Sample* sample); |
333 | 314 |
334 // Weak reference: Mach port for thread being profiled. | 315 // Weak reference: Mach port for thread being profiled. |
335 mach_port_t thread_port_; | 316 mach_port_t thread_port_; |
336 | 317 |
337 const AnnotateCallback annotator_; | 318 const AnnotateCallback annotator_; |
338 | 319 |
339 NativeStackSamplerTestDelegate* const test_delegate_; | 320 NativeStackSamplerTestDelegate* const test_delegate_; |
340 | 321 |
341 // The stack base address corresponding to |thread_handle_|. | 322 // The stack base address corresponding to |thread_handle_|. |
342 const void* const thread_stack_base_address_; | 323 const void* const thread_stack_base_address_; |
343 | 324 |
344 // The size of the |stack_copy_buffer_|. | |
345 const size_t stack_copy_buffer_size_; | |
346 | |
347 // Buffer to use for copies of the stack. We use the same buffer for all the | |
348 // samples to avoid the overhead of multiple allocations and frees. | |
349 const std::unique_ptr<unsigned char[]> stack_copy_buffer_; | |
350 | |
351 // Weak. Points to the modules associated with the profile being recorded | 325 // Weak. Points to the modules associated with the profile being recorded |
352 // between ProfileRecordingStarting() and ProfileRecordingStopped(). | 326 // between ProfileRecordingStarting() and ProfileRecordingStopped(). |
353 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr; | 327 std::vector<StackSamplingProfiler::Module>* current_modules_ = nullptr; |
354 | 328 |
355 // Maps a module's base address to the corresponding Module's index within | 329 // Maps a module's base address to the corresponding Module's index within |
356 // current_modules_. | 330 // current_modules_. |
357 std::map<const void*, size_t> profile_module_index_; | 331 std::map<const void*, size_t> profile_module_index_; |
358 | 332 |
359 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac); | 333 DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerMac); |
360 }; | 334 }; |
361 | 335 |
362 NativeStackSamplerMac::NativeStackSamplerMac( | 336 NativeStackSamplerMac::NativeStackSamplerMac( |
363 mach_port_t thread_port, | 337 mach_port_t thread_port, |
364 AnnotateCallback annotator, | 338 AnnotateCallback annotator, |
365 NativeStackSamplerTestDelegate* test_delegate) | 339 NativeStackSamplerTestDelegate* test_delegate) |
366 : thread_port_(thread_port), | 340 : thread_port_(thread_port), |
367 annotator_(annotator), | 341 annotator_(annotator), |
368 test_delegate_(test_delegate), | 342 test_delegate_(test_delegate), |
369 thread_stack_base_address_( | 343 thread_stack_base_address_( |
370 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))), | 344 pthread_get_stackaddr_np(pthread_from_mach_thread_np(thread_port))) { |
371 stack_copy_buffer_size_(StackCopyBufferSize()), | |
372 stack_copy_buffer_(new unsigned char[stack_copy_buffer_size_]) { | |
373 DCHECK(annotator_); | 345 DCHECK(annotator_); |
374 | 346 |
375 // This class suspends threads, and those threads might be suspended in dyld. | 347 // This class suspends threads, and those threads might be suspended in dyld. |
376 // Therefore, for all the system functions that might be linked in dynamically | 348 // Therefore, for all the system functions that might be linked in dynamically |
377 // that are used while threads are suspended, make calls to them to make sure | 349 // that are used while threads are suspended, make calls to them to make sure |
378 // that they are linked up. | 350 // that they are linked up. |
379 x86_thread_state64_t thread_state; | 351 x86_thread_state64_t thread_state; |
380 GetThreadState(thread_port_, &thread_state); | 352 GetThreadState(thread_port_, &thread_state); |
381 } | 353 } |
382 | 354 |
383 NativeStackSamplerMac::~NativeStackSamplerMac() {} | 355 NativeStackSamplerMac::~NativeStackSamplerMac() {} |
384 | 356 |
385 void NativeStackSamplerMac::ProfileRecordingStarting( | 357 void NativeStackSamplerMac::ProfileRecordingStarting( |
386 std::vector<StackSamplingProfiler::Module>* modules) { | 358 std::vector<StackSamplingProfiler::Module>* modules) { |
387 current_modules_ = modules; | 359 current_modules_ = modules; |
388 profile_module_index_.clear(); | 360 profile_module_index_.clear(); |
389 } | 361 } |
390 | 362 |
391 void NativeStackSamplerMac::RecordStackSample( | 363 void NativeStackSamplerMac::RecordStackSample( |
| 364 StackBuffer* stack_buffer, |
392 StackSamplingProfiler::Sample* sample) { | 365 StackSamplingProfiler::Sample* sample) { |
393 DCHECK(current_modules_); | 366 DCHECK(current_modules_); |
394 | 367 |
395 SuspendThreadAndRecordStack(sample); | 368 SuspendThreadAndRecordStack(stack_buffer, sample); |
396 } | 369 } |
397 | 370 |
398 void NativeStackSamplerMac::ProfileRecordingStopped() { | 371 void NativeStackSamplerMac::ProfileRecordingStopped(StackBuffer* stack_buffer) { |
399 current_modules_ = nullptr; | 372 current_modules_ = nullptr; |
400 } | 373 } |
401 | 374 |
402 void NativeStackSamplerMac::SuspendThreadAndRecordStack( | 375 void NativeStackSamplerMac::SuspendThreadAndRecordStack( |
| 376 StackBuffer* stack_buffer, |
403 StackSamplingProfiler::Sample* sample) { | 377 StackSamplingProfiler::Sample* sample) { |
404 x86_thread_state64_t thread_state; | 378 x86_thread_state64_t thread_state; |
405 | 379 |
406 // Copy the stack. | 380 // Copy the stack. |
407 | 381 |
408 uintptr_t new_stack_top = 0; | 382 uintptr_t new_stack_top = 0; |
409 { | 383 { |
410 // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might | 384 // IMPORTANT NOTE: Do not do ANYTHING in this in this scope that might |
411 // allocate memory, including indirectly via use of DCHECK/CHECK or other | 385 // allocate memory, including indirectly via use of DCHECK/CHECK or other |
412 // logging statements. Otherwise this code can deadlock on heap locks in the | 386 // logging statements. Otherwise this code can deadlock on heap locks in the |
413 // default heap acquired by the target thread before it was suspended. | 387 // default heap acquired by the target thread before it was suspended. |
414 ScopedSuspendThread suspend_thread(thread_port_); | 388 ScopedSuspendThread suspend_thread(thread_port_); |
415 if (!suspend_thread.was_successful()) | 389 if (!suspend_thread.was_successful()) |
416 return; | 390 return; |
417 | 391 |
418 if (!GetThreadState(thread_port_, &thread_state)) | 392 if (!GetThreadState(thread_port_, &thread_state)) |
419 return; | 393 return; |
420 uintptr_t stack_top = | 394 uintptr_t stack_top = |
421 reinterpret_cast<uintptr_t>(thread_stack_base_address_); | 395 reinterpret_cast<uintptr_t>(thread_stack_base_address_); |
422 uintptr_t stack_bottom = thread_state.__rsp; | 396 uintptr_t stack_bottom = thread_state.__rsp; |
423 if (stack_bottom >= stack_top) | 397 if (stack_bottom >= stack_top) |
424 return; | 398 return; |
425 uintptr_t stack_size = stack_top - stack_bottom; | 399 uintptr_t stack_size = stack_top - stack_bottom; |
426 | 400 |
427 if (stack_size > stack_copy_buffer_size_) | 401 if (stack_size > stack_buffer->size()) |
428 return; | 402 return; |
429 | 403 |
430 (*annotator_)(sample); | 404 (*annotator_)(sample); |
431 | 405 |
432 CopyStackAndRewritePointers( | 406 CopyStackAndRewritePointers( |
433 reinterpret_cast<uintptr_t*>(stack_copy_buffer_.get()), | 407 reinterpret_cast<uintptr_t*>(stack_buffer->buffer()), |
434 reinterpret_cast<uintptr_t*>(stack_bottom), | 408 reinterpret_cast<uintptr_t*>(stack_bottom), |
435 reinterpret_cast<uintptr_t*>(stack_top), &thread_state); | 409 reinterpret_cast<uintptr_t*>(stack_top), &thread_state); |
436 | 410 |
437 new_stack_top = | 411 new_stack_top = |
438 reinterpret_cast<uintptr_t>(stack_copy_buffer_.get()) + stack_size; | 412 reinterpret_cast<uintptr_t>(stack_buffer->buffer()) + stack_size; |
439 } // ScopedSuspendThread | 413 } // ScopedSuspendThread |
440 | 414 |
441 if (test_delegate_) | 415 if (test_delegate_) |
442 test_delegate_->OnPreStackWalk(); | 416 test_delegate_->OnPreStackWalk(); |
443 | 417 |
444 // Walk the stack and record it. | 418 // Walk the stack and record it. |
445 | 419 |
446 // Reserve enough memory for most stacks, to avoid repeated allocations. | 420 // Reserve enough memory for most stacks, to avoid repeated allocations. |
447 // Approximately 99.9% of recorded stacks are 128 frames or fewer. | 421 // Approximately 99.9% of recorded stacks are 128 frames or fewer. |
448 sample->frames.reserve(128); | 422 sample->frames.reserve(128); |
(...skipping 12 matching lines...) Expand all Loading... |
461 } // namespace | 435 } // namespace |
462 | 436 |
463 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create( | 437 std::unique_ptr<NativeStackSampler> NativeStackSampler::Create( |
464 PlatformThreadId thread_id, | 438 PlatformThreadId thread_id, |
465 AnnotateCallback annotator, | 439 AnnotateCallback annotator, |
466 NativeStackSamplerTestDelegate* test_delegate) { | 440 NativeStackSamplerTestDelegate* test_delegate) { |
467 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator, | 441 return base::MakeUnique<NativeStackSamplerMac>(thread_id, annotator, |
468 test_delegate); | 442 test_delegate); |
469 } | 443 } |
470 | 444 |
| 445 size_t NativeStackSampler::GetStackBufferSize() { |
| 446 // In platform_thread_mac's GetDefaultThreadStackSize(), RLIMIT_STACK is used |
| 447 // for all stacks, not just the main thread's, so it is good for use here. |
| 448 struct rlimit stack_rlimit; |
| 449 if (getrlimit(RLIMIT_STACK, &stack_rlimit) == 0 && |
| 450 stack_rlimit.rlim_cur != RLIM_INFINITY) { |
| 451 return stack_rlimit.rlim_cur; |
| 452 } |
| 453 |
| 454 // If getrlimit somehow fails, return the default macOS main thread stack size |
| 455 // of 8 MB (DFLSSIZ in <i386/vmparam.h>) with extra wiggle room. |
| 456 return 12 * 1024 * 1024; |
| 457 } |
| 458 |
471 } // namespace base | 459 } // namespace base |
OLD | NEW |