Chromium Code Reviews| Index: content/renderer/devtools/v8_sampling_profiler.cc |
| diff --git a/content/renderer/devtools/v8_sampling_profiler.cc b/content/renderer/devtools/v8_sampling_profiler.cc |
| index 17d7556559b7be9622c12feb2d573c0bd702fdd6..2f97e2098c233e2da60923d8e630f9feb2e6a05b 100644 |
| --- a/content/renderer/devtools/v8_sampling_profiler.cc |
| +++ b/content/renderer/devtools/v8_sampling_profiler.cc |
| @@ -4,8 +4,13 @@ |
| #include "content/renderer/devtools/v8_sampling_profiler.h" |
| +#if defined(OS_POSIX) |
| +#include <signal.h> |
|
yurys
2015/03/18 12:49:03
You'll likely need <ucontext.h> and <mach/mach.h>
alph
2015/03/18 13:37:49
It compiles so far. I'll add these includes when i
|
| +#define USE_SIGNALS |
| +#endif |
| + |
| #include "base/format_macros.h" |
| -#include "base/strings/string_util.h" |
| +#include "base/strings/stringprintf.h" |
| #include "base/synchronization/cancellation_flag.h" |
| #include "base/threading/platform_thread.h" |
| #include "base/trace_event/trace_event.h" |
| @@ -13,6 +18,7 @@ |
| #include "content/renderer/render_thread_impl.h" |
| #include "v8/include/v8.h" |
| +using base::trace_event::ConvertableToTraceFormat; |
| using base::trace_event::TraceLog; |
| using base::trace_event::TracedValue; |
| using v8::Isolate; |
| @@ -22,10 +28,173 @@ namespace content { |
| namespace { |
| std::string PtrToString(const void* value) { |
| - char buffer[20]; |
| - base::snprintf(buffer, sizeof(buffer), "0x%" PRIx64, |
| - static_cast<uint64>(reinterpret_cast<intptr_t>(value))); |
| - return buffer; |
| + return base::StringPrintf( |
| + "0x%" PRIx64, static_cast<uint64>(reinterpret_cast<intptr_t>(value))); |
| +} |
| + |
| +// Lock-free cache-friendly sampling circular queue for large |
| +// records. Intended for fast transfer of large records between a |
| +// single producer and a single consumer. If the queue is full, |
| +// StartEnqueue will return nullptr. The queue is designed with |
| +// a goal in mind to evade cache lines thrashing by preventing |
| +// simultaneous reads and writes to adjanced memory locations. |
| +template <typename T, unsigned Length> |
| +class SamplingCircularQueue { |
|
yurys
2015/03/18 12:49:04
Let's move it in a separate file.
alph
2015/03/18 13:37:49
Done.
|
| + public: |
| + // Executed on the application thread. |
| + SamplingCircularQueue(); |
| + ~SamplingCircularQueue(); |
| + |
| + // StartEnqueue returns a pointer to a memory location for storing the next |
| + // record or nullptr if all entries are full at the moment. |
| + T* StartEnqueue(); |
| + // Notifies the queue that the producer has complete writing data into the |
| + // memory returned by StartEnqueue and it can be passed to the consumer. |
| + void FinishEnqueue(); |
| + |
| + // Executed on the consumer (analyzer) thread. |
| + // Retrieves, but does not remove, the head of this queue, returning nullptr |
| + // if this queue is empty. After the record had been read by a consumer, |
| + // Remove must be called. |
| + T* Peek(); |
| + void Remove(); |
| + |
| + private: |
| + // Reserved values for the entry marker. |
| + enum { |
| + kEmpty, // Marks clean (processed) entries. |
| + kFull // Marks entries already filled by the producer but not yet |
| + // completely processed by the consumer. |
| + }; |
| + |
| + struct /*V8_ALIGNED(PROCESSOR_CACHE_LINE_SIZE)*/ Entry { |
| + Entry() : marker(kEmpty) {} |
| + T record; |
| + base::subtle::Atomic32 marker; |
| + }; |
| + |
| + Entry* Next(Entry* entry); |
| + |
| + Entry buffer_[Length]; |
| + /*V8_ALIGNED(PROCESSOR_CACHE_LINE_SIZE)*/ Entry* enqueue_pos_; |
| + /*V8_ALIGNED(PROCESSOR_CACHE_LINE_SIZE)*/ Entry* dequeue_pos_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(SamplingCircularQueue); |
| +}; |
| + |
| +template <typename T, unsigned L> |
| +SamplingCircularQueue<T, L>::SamplingCircularQueue() |
| + : enqueue_pos_(buffer_), dequeue_pos_(buffer_) { |
| +} |
| + |
| +template <typename T, unsigned L> |
| +SamplingCircularQueue<T, L>::~SamplingCircularQueue() { |
| +} |
| + |
| +template <typename T, unsigned L> |
| +T* SamplingCircularQueue<T, L>::Peek() { |
| + base::subtle::MemoryBarrier(); |
| + if (base::subtle::Acquire_Load(&dequeue_pos_->marker) == kFull) { |
| + return &dequeue_pos_->record; |
| + } |
| + return nullptr; |
| +} |
| + |
| +template <typename T, unsigned L> |
| +void SamplingCircularQueue<T, L>::Remove() { |
| + base::subtle::Release_Store(&dequeue_pos_->marker, kEmpty); |
| + dequeue_pos_ = Next(dequeue_pos_); |
| +} |
| + |
| +template <typename T, unsigned L> |
| +T* SamplingCircularQueue<T, L>::StartEnqueue() { |
| + base::subtle::MemoryBarrier(); |
| + if (base::subtle::Acquire_Load(&enqueue_pos_->marker) == kEmpty) { |
| + return &enqueue_pos_->record; |
| + } |
| + return nullptr; |
| +} |
| + |
| +template <typename T, unsigned L> |
| +void SamplingCircularQueue<T, L>::FinishEnqueue() { |
| + base::subtle::Release_Store(&enqueue_pos_->marker, kFull); |
| + enqueue_pos_ = Next(enqueue_pos_); |
| +} |
| + |
| +template <typename T, unsigned L> |
| +typename SamplingCircularQueue<T, L>::Entry* SamplingCircularQueue<T, L>::Next( |
| + Entry* entry) { |
| + Entry* next = entry + 1; |
| + if (next == &buffer_[L]) |
| + return buffer_; |
| + return next; |
| +} |
| + |
| +class SampleRecord { |
| + public: |
| + static const int kMaxFramesCountLog2 = 8; |
| + static const unsigned kMaxFramesCount = (1u << kMaxFramesCountLog2) - 1; |
| + |
| + SampleRecord() {} |
| + |
| + base::TimeTicks timestamp() const { return timestamp_; } |
| + void Collect(v8::Isolate* isolate, |
| + base::TimeTicks timestamp, |
| + const v8::RegisterState& state); |
| + scoped_refptr<ConvertableToTraceFormat> ToTraceFormat() const; |
| + |
| + private: |
| + base::TimeTicks timestamp_; |
| + unsigned vm_state_ : 4; |
| + unsigned frames_count_ : kMaxFramesCountLog2; |
| + const void* frames_[kMaxFramesCount]; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(SampleRecord); |
| +}; |
| + |
| +void SampleRecord::Collect(v8::Isolate* isolate, |
| + base::TimeTicks timestamp, |
| + const v8::RegisterState& state) { |
| + v8::SampleInfo sample_info; |
| + isolate->GetStackSample(state, (void**)frames_, kMaxFramesCount, |
| + &sample_info); |
| + timestamp_ = timestamp; |
| + frames_count_ = sample_info.frames_count; |
| + vm_state_ = sample_info.vm_state; |
| +} |
| + |
| +scoped_refptr<ConvertableToTraceFormat> SampleRecord::ToTraceFormat() const { |
| + scoped_refptr<TracedValue> data(new TracedValue()); |
| + const char* vm_state = nullptr; |
| + switch (vm_state_) { |
| + case v8::StateTag::JS: |
| + vm_state = "js"; |
| + break; |
| + case v8::StateTag::GC: |
| + vm_state = "gc"; |
| + break; |
| + case v8::StateTag::COMPILER: |
| + vm_state = "compiler"; |
| + break; |
| + case v8::StateTag::OTHER: |
| + vm_state = "other"; |
| + break; |
| + case v8::StateTag::EXTERNAL: |
| + vm_state = "external"; |
| + break; |
| + case v8::StateTag::IDLE: |
| + vm_state = "idle"; |
| + break; |
| + default: |
| + NOTREACHED(); |
| + } |
| + data->SetString("vm_state", vm_state); |
| + data->BeginArray("stack"); |
| + for (unsigned i = 0; i < frames_count_; ++i) { |
| + data->AppendString(PtrToString(frames_[i])); |
| + } |
| + data->EndArray(); |
| + return data; |
| } |
| } // namespace |
| @@ -33,30 +202,78 @@ std::string PtrToString(const void* value) { |
| // The class implements a sampler responsible for sampling a single thread. |
| class Sampler { |
| public: |
| - explicit Sampler(Isolate* isolate) : isolate_(isolate) { DCHECK(isolate_); } |
| + Sampler(base::PlatformThreadHandle thread_handle, |
| + base::PlatformThreadId thread_id, |
| + Isolate* isolate); |
| + ~Sampler(); |
| static scoped_ptr<Sampler> CreateForCurrentThread(); |
| + static Sampler* GetInstance() { return tls_instance_.Pointer()->Get(); } |
| // These methods are called from the sampling thread. |
| void Start(); |
| void Stop(); |
| void Sample(); |
| + void DoSample(const v8::RegisterState& state); |
| + |
| + bool EventsCollectedForTest() const { |
| + return base::subtle::NoBarrier_Load(&code_added_events_count_) != 0 || |
| + base::subtle::NoBarrier_Load(&samples_count_) != 0; |
| + } |
| + |
| private: |
| static void InstallJitCodeEventHandler(Isolate* isolate, void* data); |
| static void HandleJitCodeEvent(const v8::JitCodeEvent* event); |
| - static scoped_refptr<base::trace_event::ConvertableToTraceFormat> |
| - JitCodeEventToTraceFormat(const v8::JitCodeEvent* event); |
| + static scoped_refptr<ConvertableToTraceFormat> JitCodeEventToTraceFormat( |
| + const v8::JitCodeEvent* event); |
| + void InjectPendingEvents(); |
| + |
| + static const unsigned kNumberOfSamples = 10; |
| + typedef SamplingCircularQueue<SampleRecord, kNumberOfSamples> SamplingQueue; |
| + |
| + base::PlatformThreadId thread_id_; |
| + base::PlatformThreadHandle thread_handle_; |
| Isolate* isolate_; |
| + scoped_ptr<SamplingQueue> samples_data_; |
| + base::subtle::Atomic32 code_added_events_count_; |
| + base::subtle::Atomic32 samples_count_; |
| + |
| + static base::LazyInstance<base::ThreadLocalPointer<Sampler>>::Leaky |
| + tls_instance_; |
| }; |
| +base::LazyInstance<base::ThreadLocalPointer<Sampler>>::Leaky |
| + Sampler::tls_instance_ = LAZY_INSTANCE_INITIALIZER; |
| + |
| +Sampler::Sampler(base::PlatformThreadHandle thread_handle, |
| + base::PlatformThreadId thread_id, |
| + Isolate* isolate) |
| + : thread_id_(thread_id), |
| + thread_handle_(thread_handle), |
| + isolate_(isolate), |
| + code_added_events_count_(0), |
| + samples_count_(0) { |
| + DCHECK(isolate_); |
| + DCHECK(!GetInstance()); |
| + tls_instance_.Pointer()->Set(this); |
| +} |
| + |
| +Sampler::~Sampler() { |
| + DCHECK(GetInstance()); |
| + tls_instance_.Pointer()->Set(nullptr); |
|
yurys
2015/03/18 12:49:03
It is not safe to access TLS storage in signal han
alph
2015/03/18 13:37:49
It should be safe provided that:
- the tls variabl
|
| +} |
| + |
| // static |
| scoped_ptr<Sampler> Sampler::CreateForCurrentThread() { |
| - return scoped_ptr<Sampler>(new Sampler(Isolate::GetCurrent())); |
| + return scoped_ptr<Sampler>(new Sampler(base::PlatformThread::CurrentHandle(), |
| + base::PlatformThread::CurrentId(), |
| + Isolate::GetCurrent())); |
| } |
| void Sampler::Start() { |
| + samples_data_.reset(new SamplingQueue()); |
| v8::JitCodeEventHandler handler = &HandleJitCodeEvent; |
| isolate_->RequestInterrupt(&InstallJitCodeEventHandler, |
| reinterpret_cast<void*>(handler)); |
| @@ -64,9 +281,44 @@ void Sampler::Start() { |
| void Sampler::Stop() { |
| isolate_->RequestInterrupt(&InstallJitCodeEventHandler, nullptr); |
| + samples_data_.reset(); |
| } |
| void Sampler::Sample() { |
| +#if defined(USE_SIGNALS) |
| + int error = pthread_kill(thread_handle_.platform_handle(), SIGPROF); |
| + if (error) { |
| + LOG(ERROR) << "pthread_kill failed with error " << error << " " |
| + << strerror(error); |
| + } |
| + InjectPendingEvents(); |
| +#endif |
| +} |
| + |
| +void Sampler::DoSample(const v8::RegisterState& state) { |
| + // Called in the sampled thread signal handler. |
| + // Because of that it is not allowed to do any memory allocation here. |
| + base::TimeTicks timestamp = base::TimeTicks::NowFromSystemTraceTime(); |
| + SampleRecord* record = samples_data_->StartEnqueue(); |
| + if (!record) { |
| + LOG(ERROR) << "No space left in the sampling buffer"; |
|
yurys
2015/03/18 12:49:04
I'm pretty much sure it will allocate.
alph
2015/03/18 13:37:49
Acknowledged.
|
| + return; |
| + } |
| + record->Collect(isolate_, timestamp, state); |
| + samples_data_->FinishEnqueue(); |
| + base::subtle::NoBarrier_AtomicIncrement(&samples_count_, 1); |
| +} |
| + |
| +void Sampler::InjectPendingEvents() { |
| + SampleRecord* record = samples_data_->Peek(); |
| + while (record) { |
| + TRACE_EVENT_SAMPLE_WITH_TID_AND_TIMESTAMP1( |
| + TRACE_DISABLED_BY_DEFAULT("v8.cpu_profile"), "V8Sample", thread_id_, |
| + (record->timestamp() - base::TimeTicks()).InMicroseconds(), "data", |
| + record->ToTraceFormat()); |
| + samples_data_->Remove(); |
| + record = samples_data_->Peek(); |
| + } |
| } |
| // static |
| @@ -83,11 +335,18 @@ void Sampler::InstallJitCodeEventHandler(Isolate* isolate, void* data) { |
| // static |
| void Sampler::HandleJitCodeEvent(const v8::JitCodeEvent* event) { |
| // Called on the sampled V8 thread. |
| + Sampler* sampler = GetInstance(); |
| + // The sampler may have already been destroyed. |
| + // That's fine, we're not interested in these events anymore. |
| + if (!sampler) |
| + return; |
| switch (event->type) { |
| case v8::JitCodeEvent::CODE_ADDED: |
| TRACE_EVENT_INSTANT1(TRACE_DISABLED_BY_DEFAULT("v8.cpu_profile"), |
| "JitCodeAdded", TRACE_EVENT_SCOPE_THREAD, "data", |
| JitCodeEventToTraceFormat(event)); |
| + base::subtle::NoBarrier_AtomicIncrement( |
| + &sampler->code_added_events_count_, 1); |
| break; |
| case v8::JitCodeEvent::CODE_MOVED: |
| @@ -110,9 +369,8 @@ void Sampler::HandleJitCodeEvent(const v8::JitCodeEvent* event) { |
| } |
| // static |
| -scoped_refptr<base::trace_event::ConvertableToTraceFormat> |
| -Sampler::JitCodeEventToTraceFormat(const v8::JitCodeEvent* event) { |
| - // Called on the sampled thread. |
| +scoped_refptr<ConvertableToTraceFormat> Sampler::JitCodeEventToTraceFormat( |
| + const v8::JitCodeEvent* event) { |
| switch (event->type) { |
| case v8::JitCodeEvent::CODE_ADDED: { |
| scoped_refptr<TracedValue> data(new TracedValue()); |
| @@ -161,6 +419,13 @@ class V8SamplingThread : public base::PlatformThread::Delegate { |
| void RemoveSamplers(); |
| void StartSamplers(); |
| void StopSamplers(); |
| + |
| + static void InstallSignalHandler(); |
| + static void RestoreSignalHandler(); |
| +#ifdef USE_SIGNALS |
| + static void HandleProfilerSignal(int signal, siginfo_t* info, void* context); |
| +#endif |
| + |
| static void HandleJitCodeEvent(const v8::JitCodeEvent* event); |
| Sampler* render_thread_sampler_; |
| @@ -169,9 +434,19 @@ class V8SamplingThread : public base::PlatformThread::Delegate { |
| base::PlatformThreadHandle sampling_thread_handle_; |
| std::vector<Sampler*> samplers_; |
| + static bool signal_handler_installed_; |
|
yurys
2015/03/18 12:49:03
Should be behind #ifdef
alph
2015/03/18 13:37:49
Done.
|
| +#ifdef USE_SIGNALS |
| + static struct sigaction old_signal_handler_; |
| +#endif |
| + |
| DISALLOW_COPY_AND_ASSIGN(V8SamplingThread); |
| }; |
| +bool V8SamplingThread::signal_handler_installed_; |
| +#ifdef USE_SIGNALS |
| +struct sigaction V8SamplingThread::old_signal_handler_; |
| +#endif |
| + |
| V8SamplingThread::V8SamplingThread(Sampler* render_thread_sampler, |
| base::WaitableEvent* event) |
| : render_thread_sampler_(render_thread_sampler), |
| @@ -182,15 +457,20 @@ void V8SamplingThread::ThreadMain() { |
| base::PlatformThread::SetName("V8SamplingProfilerThread"); |
| InstallSamplers(); |
| StartSamplers(); |
| + InstallSignalHandler(); |
| const int kSamplingFrequencyMicroseconds = 1000; |
| while (!cancellation_flag_.IsSet()) { |
| Sample(); |
| - if (waitable_event_for_testing_) { |
| + if (waitable_event_for_testing_ && |
| + render_thread_sampler_->EventsCollectedForTest()) { |
| waitable_event_for_testing_->Signal(); |
| } |
| + // TODO(alph): make the samples firing interval not depend on the sample |
| + // taking duration. |
| base::PlatformThread::Sleep( |
| base::TimeDelta::FromMicroseconds(kSamplingFrequencyMicroseconds)); |
| } |
| + RestoreSignalHandler(); |
| StopSamplers(); |
| RemoveSamplers(); |
| } |
| @@ -223,9 +503,74 @@ void V8SamplingThread::StopSamplers() { |
| } |
| } |
| +// static |
| +void V8SamplingThread::InstallSignalHandler() { |
| +#ifdef USE_SIGNALS |
| + // There must be the only one! |
| + DCHECK(!signal_handler_installed_); |
| + struct sigaction sa; |
| + sa.sa_sigaction = &HandleProfilerSignal; |
| + sigemptyset(&sa.sa_mask); |
| + sa.sa_flags = SA_RESTART | SA_SIGINFO; |
| + signal_handler_installed_ = |
| + (sigaction(SIGPROF, &sa, &old_signal_handler_) == 0); |
| +#endif |
| +} |
| + |
| +// static |
| +void V8SamplingThread::RestoreSignalHandler() { |
| +#ifdef USE_SIGNALS |
| + if (!signal_handler_installed_) |
| + return; |
| + sigaction(SIGPROF, &old_signal_handler_, 0); |
| + signal_handler_installed_ = false; |
| +#endif |
| +} |
| + |
| +#ifdef USE_SIGNALS |
| +// static |
| +void V8SamplingThread::HandleProfilerSignal(int signal, |
| + siginfo_t* info, |
| + void* context) { |
| + if (signal != SIGPROF) |
| + return; |
| + ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); |
| + mcontext_t& mcontext = ucontext->uc_mcontext; |
| + v8::RegisterState state; |
| + |
| +#if defined(OS_ANDROID) |
| + // TODO(alph): Add support for Android |
| + ALLOW_UNUSED_LOCAL(mcontext); |
| + |
| +#elif defined(OS_MACOSX) |
| +#if ARCH_CPU_64_BITS |
| + state.pc = reinterpret_cast<void*>(mcontext->__ss.__rip); |
| + state.sp = reinterpret_cast<void*>(mcontext->__ss.__rsp); |
| + state.fp = reinterpret_cast<void*>(mcontext->__ss.__rbp); |
| +#elif ARCH_CPU_32_BITS |
| + state.pc = reinterpret_cast<void*>(mcontext->__ss.__eip); |
| + state.sp = reinterpret_cast<void*>(mcontext->__ss.__esp); |
| + state.fp = reinterpret_cast<void*>(mcontext->__ss.__ebp); |
| +#endif // ARCH_CPU_32_BITS |
| + |
| +#else |
| +#if ARCH_CPU_64_BITS |
| + state.pc = reinterpret_cast<void*>(mcontext.gregs[REG_RIP]); |
| + state.sp = reinterpret_cast<void*>(mcontext.gregs[REG_RSP]); |
| + state.fp = reinterpret_cast<void*>(mcontext.gregs[REG_RBP]); |
| +#elif ARCH_CPU_32_BITS |
| + state.pc = reinterpret_cast<void*>(mcontext.gregs[REG_EIP]); |
| + state.sp = reinterpret_cast<void*>(mcontext.gregs[REG_ESP]); |
| + state.fp = reinterpret_cast<void*>(mcontext.gregs[REG_EBP]); |
| +#endif // ARCH_CPU_32_BITS |
| +#endif |
| + Sampler::GetInstance()->DoSample(state); |
| +} |
| +#endif |
| + |
| void V8SamplingThread::Start() { |
| if (!base::PlatformThread::Create(0, this, &sampling_thread_handle_)) { |
| - DCHECK(false) << "failed to create thread"; |
| + DCHECK(false) << "failed to create sampling thread"; |
| } |
| } |
| @@ -236,7 +581,8 @@ void V8SamplingThread::Stop() { |
| V8SamplingProfiler::V8SamplingProfiler(bool underTest) |
| : sampling_thread_(nullptr), |
| - render_thread_sampler_(Sampler::CreateForCurrentThread()) { |
| + render_thread_sampler_(Sampler::CreateForCurrentThread()), |
| + message_loop_proxy_(base::MessageLoopProxy::current()) { |
| DCHECK(underTest || RenderThreadImpl::current()); |
| // Force the "v8.cpu_profile" category to show up in the trace viewer. |
| TraceLog::GetCategoryGroupEnabled( |
| @@ -249,6 +595,13 @@ V8SamplingProfiler::~V8SamplingProfiler() { |
| DCHECK(!sampling_thread_.get()); |
| } |
| +void V8SamplingProfiler::StartSamplingThread() { |
| + DCHECK(!sampling_thread_.get()); |
| + sampling_thread_.reset(new V8SamplingThread( |
| + render_thread_sampler_.get(), waitable_event_for_testing_.get())); |
| + sampling_thread_->Start(); |
| +} |
| + |
| void V8SamplingProfiler::OnTraceLogEnabled() { |
| bool enabled; |
| TRACE_EVENT_CATEGORY_GROUP_ENABLED( |
| @@ -258,15 +611,15 @@ void V8SamplingProfiler::OnTraceLogEnabled() { |
| // Do not enable sampling profiler in continuous mode, as losing |
| // Jit code events may not be afforded. |
| + // TODO(alph): add support of infinite recording of meta trace events. |
| base::trace_event::TraceRecordMode record_mode = |
| TraceLog::GetInstance()->GetCurrentTraceOptions().record_mode; |
| if (record_mode == base::trace_event::TraceRecordMode::RECORD_CONTINUOUSLY) |
| return; |
| - DCHECK(!sampling_thread_.get()); |
| - sampling_thread_.reset(new V8SamplingThread( |
| - render_thread_sampler_.get(), waitable_event_for_testing_.get())); |
| - sampling_thread_->Start(); |
| + message_loop_proxy_->PostTask( |
| + FROM_HERE, base::Bind(&V8SamplingProfiler::StartSamplingThread, |
| + base::Unretained(this))); |
| } |
| void V8SamplingProfiler::OnTraceLogDisabled() { |