services/gfx/compositor/backend/gpu_output.cc - Issue 1995873002: Mozart: Improve tracing and backpressure.

Unified Diff: services/gfx/compositor/backend/gpu_output.cc

Issue 1995873002: Mozart: Improve tracing and backpressure. (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« services/gfx/compositor/backend/gpu_output.h ('K') | « services/gfx/compositor/backend/gpu_output.h ('k') | services/gfx/compositor/backend/gpu_rasterizer.h » ('j') | services/gfx/compositor/backend/gpu_rasterizer.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: services/gfx/compositor/backend/gpu_output.cc

diff --git a/services/gfx/compositor/backend/gpu_output.cc b/services/gfx/compositor/backend/gpu_output.cc

index 37a51e4a264736907239b7b4dc824e07670937d3..cca044b12198c99fc855b68bf78539b9cd0a4fbe 100644

--- a/services/gfx/compositor/backend/gpu_output.cc

+++ b/services/gfx/compositor/backend/gpu_output.cc

@@ -7,159 +7,294 @@

#include <utility>

#include "base/bind.h"

+#include "base/command_line.h"

#include "base/location.h"

#include "base/logging.h"

#include "base/message_loop/message_loop.h"

+#include "base/strings/string_number_conversions.h"

#include "base/trace_event/trace_event.h"

-#include "services/gfx/compositor/backend/gpu_rasterizer.h"

#include "services/gfx/compositor/render/render_frame.h"

namespace compositor {

namespace {

-// Maximum number of frames to hold in the queue for rendering.

-constexpr size_t kMaxPipelineDepth = 1;

-template <typename T>

-static void Drop(scoped_ptr<T> ptr) {}

+constexpr const char* kPipelineDepthSwitch = "pipeline-depth";

+constexpr uint32_t kDefaultPipelineDepth = 2u; // ideally should be 1

+constexpr uint32_t kMinPipelineDepth = 1u;

+constexpr uint32_t kMaxPipelineDepth = 10u;

mikejurka 2016/05/19 23:26:18 why should we ever be more than 3 frames ahead?

jeffbrown 2016/05/20 01:09:47 This is sort of arbitrary. The limit only exists

-static scoped_ptr<base::MessagePump> CreateMessagePumpMojo() {

+scoped_ptr<base::MessagePump> CreateMessagePumpMojo() {

return base::MessageLoop::CreateMessagePumpForType(

base::MessageLoop::TYPE_DEFAULT);

}

+} // namespace

GpuOutput::GpuOutput(

mojo::InterfaceHandle<mojo::ContextProvider> context_provider,

const SchedulerCallbacks& scheduler_callbacks,

const base::Closure& error_callback)

- : scheduler_(new VsyncScheduler(base::MessageLoop::current()->task_runner(),

- scheduler_callbacks)),

- rasterizer_delegate_(make_scoped_ptr(new RasterizerDelegate())) {

+ : compositor_task_runner_(base::MessageLoop::current()->task_runner()),

+ vsync_scheduler_(

+ new VsyncScheduler(compositor_task_runner_, scheduler_callbacks)),

+ rasterizer_thread_(new base::Thread("gpu_rasterizer")),

+ rasterizer_initialized_(true, false) {

DCHECK(context_provider);

- rasterizer_delegate_->PostInitialize(

- std::move(context_provider), scheduler_,

- base::MessageLoop::current()->task_runner(), error_callback);

+ pipeline_depth_ = kDefaultPipelineDepth;

+ auto command_line = base::CommandLine::ForCurrentProcess();

+ if (command_line->HasSwitch(kPipelineDepthSwitch)) {

+ std::string str(command_line->GetSwitchValueASCII(kPipelineDepthSwitch));

+ unsigned value;

+ if (base::StringToUint(str, &value) && value >= kMinPipelineDepth &&

+ value <= kMaxPipelineDepth) {

+ pipeline_depth_ = value;

+ } else {

+ LOG(ERROR) << "Invalid value for --" << kPipelineDepthSwitch << ": \""

+ << str << "\"";

mikejurka 2016/05/19 23:26:18 is there a way to exit/crash here? should we call

jeffbrown 2016/05/20 01:09:47 Hmm. Not a bad idea.

+ }

+ DVLOG(2) << "Using pipeline depth " << pipeline_depth_;

+ base::Thread::Options options;

+ options.message_pump_factory = base::Bind(&CreateMessagePumpMojo);

+ rasterizer_thread_->StartWithOptions(options);

+ rasterizer_task_runner_ = rasterizer_thread_->message_loop()->task_runner();

+ rasterizer_task_runner_->PostTask(

+ FROM_HERE,

+ base::Bind(&GpuOutput::InitializeRasterizer, base::Unretained(this),

+ base::Passed(std::move(context_provider))));

+ rasterizer_initialized_.Wait();

+ DCHECK(rasterizer_);

}

GpuOutput::~GpuOutput() {

- // Ensure destruction happens on the correct thread.

- rasterizer_delegate_->PostDestroy(rasterizer_delegate_.Pass());

+ // Ensure rasterizer destruction happens on the rasterizer thread.

+ rasterizer_task_runner_->PostTask(

+ FROM_HERE,

+ base::Bind(&GpuOutput::DestroyRasterizer, base::Unretained(this)));

+ rasterizer_thread_->Stop();

mikejurka 2016/05/19 23:26:18 does ->Stop() block until the thread is done?

jeffbrown 2016/05/20 01:09:47 Yes.

+ DCHECK(!rasterizer_);

}

Scheduler* GpuOutput::GetScheduler() {

- return scheduler_.get();

+ return vsync_scheduler_.get();

}

void GpuOutput::SubmitFrame(const scoped_refptr<RenderFrame>& frame) {

- rasterizer_delegate_->PostFrame(frame);

+ DCHECK(frame);

+ TRACE_EVENT0("gfx", "GpuOutput::SubmitFrame");

+ const int64_t submit_time = MojoGetTimeTicksNow();

+ scoped_refptr<FrameData> frame_data(

mikejurka 2016/05/19 23:26:18 is there a way we can avoid using a scoped_refptr

jeffbrown 2016/05/20 01:09:47 These two things go together. There's no wrapping

+ new FrameData(frame, submit_time)); // drop outside lock

+ {

+ std::lock_guard<std::mutex> lock(shared_state_.mutex);

+ TRACE_EVENT_FLOW_BEGIN0("gfx", "Frame Queued", frame_data.get());

+ shared_state_.current_frame_data.swap(frame_data);

+ if (frame_data && !frame_data->draw_time) {

mikejurka 2016/05/19 23:26:18 what does it mean if draw_time is 0? does it mean

jeffbrown 2016/05/20 01:09:47 Added a comment to the header file.

+ // Dropped an undrawn frame.

+ DVLOG(2) << "Rasterizer stalled, dropping frame to catch up.";

+ TRACE_EVENT_FLOW_END1("gfx", "Frame Queued", frame_data.get(), "drawn",

+ false);

+ }

+ // TODO(jeffbrown): If the draw queue is full, we should pause

+ // scheduling until the queue drains.

+ if (shared_state_.rasterizer_ready &&

+ shared_state_.drawn_frames_awaiting_finish.size() < pipeline_depth_)

+ ScheduleDrawLocked();

+ }

}

-GpuOutput::RasterizerDelegate::RasterizerDelegate() {

- base::Thread::Options options;

- options.message_pump_factory = base::Bind(&CreateMessagePumpMojo);

+void GpuOutput::OnRasterizerReady(int64_t vsync_timebase,

+ int64_t vsync_interval) {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

+ // TODO(jeffbrown): This shouldn't be hardcoded.

+ // Need to do some real tuning and possibly determine values adaptively.

+ // We should probably split the Start() method in two to separate the

+ // process of setting parameters from starting / stopping scheduling.

+ const int64_t update_phase = -vsync_interval;

+ const int64_t snapshot_phase = -vsync_interval / 6;

+ // TODO(jeffbrown): Determine the presentation phase based on queue depth.

+ const int64_t presentation_phase = vsync_interval * pipeline_depth_;

+ if (!vsync_scheduler_->Start(vsync_timebase, vsync_interval, update_phase,

+ snapshot_phase, presentation_phase)) {

+ LOG(ERROR) << "Received invalid vsync parameters: timebase="

+ << vsync_timebase << ", interval=" << vsync_interval;

+ PostErrorCallback();

+ return;

+ }

+ {

+ std::lock_guard<std::mutex> lock(shared_state_.mutex);

+ if (shared_state_.rasterizer_ready)

+ return;

+ DCHECK(shared_state_.drawn_frames_awaiting_finish.empty());

+ shared_state_.rasterizer_ready = true;

- thread_.reset(new base::Thread("gpu_rasterizer"));

- thread_->StartWithOptions(options);

- task_runner_ = thread_->message_loop()->task_runner();

+ if (!shared_state_.current_frame_data)

+ return;

+ shared_state_.current_frame_data->Recycle();

+ TRACE_EVENT_FLOW_BEGIN0("gfx", "Frame Queued",

+ shared_state_.current_frame_data.get());

+ ScheduleDrawLocked();

+ }

}

-GpuOutput::RasterizerDelegate::~RasterizerDelegate() {}

+void GpuOutput::OnRasterizerSuspended() {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

-void GpuOutput::RasterizerDelegate::PostInitialize(

- mojo::InterfaceHandle<mojo::ContextProvider> context_provider,

- const scoped_refptr<VsyncScheduler>& scheduler,

- const scoped_refptr<base::TaskRunner>& task_runner,

- const base::Closure& error_callback) {

- task_runner_->PostTask(

- FROM_HERE,

- base::Bind(&RasterizerDelegate::InitializeTask, base::Unretained(this),

- base::Passed(std::move(context_provider)), scheduler,

- base::MessageLoop::current()->task_runner(), error_callback));

+ vsync_scheduler_->Stop();

+ {

+ std::lock_guard<std::mutex> lock(shared_state_.mutex);

+ if (!shared_state_.rasterizer_ready)

+ return;

+ shared_state_.rasterizer_ready = false;

+ }

+void GpuOutput::OnRasterizerError() {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

+ PostErrorCallback();

}

-void GpuOutput::RasterizerDelegate::PostDestroy(

- scoped_ptr<RasterizerDelegate> self) {

- task_runner_->PostTask(

- FROM_HERE, base::Bind(&Drop<RasterizerDelegate>, base::Passed(&self)));

+void GpuOutput::ScheduleDrawLocked() {

+ DCHECK(shared_state_.current_frame_data);

+ DCHECK(!shared_state_.current_frame_data->draw_time);

+ if (shared_state_.draw_scheduled)

+ return;

+ shared_state_.draw_scheduled = true;

+ rasterizer_task_runner_->PostTask(

+ FROM_HERE, base::Bind(&GpuOutput::OnDraw, base::Unretained(this)));

}

-void GpuOutput::RasterizerDelegate::PostFrame(

- const scoped_refptr<RenderFrame>& frame) {

- bool was_empty;

- scoped_refptr<RenderFrame> dropped_frame;

+void GpuOutput::OnDraw() {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

+ TRACE_EVENT0("gfx", "GpuOutput::OnDraw");

+ scoped_refptr<FrameData> frame_data; // used outside lock

{

- std::lock_guard<std::mutex> lock(mutex_);

- was_empty = frames_.empty();

- if (frames_.size() == kMaxPipelineDepth) {

- // TODO(jeffbrown): Adjust scheduler behavior to compensate.

- LOG(ERROR) << "Renderer pipeline stalled, dropping a frame to catch up.";

- dropped_frame = frames_.front(); // drop an old frame outside the lock

- frames_.pop();

- }

- frames_.push(frame);

+ std::lock_guard<std::mutex> lock(shared_state_.mutex);

+ DCHECK(shared_state_.draw_scheduled);

+ DCHECK(shared_state_.current_frame_data);

+ DCHECK(!shared_state_.current_frame_data->draw_time);

+ shared_state_.draw_scheduled = false;

+ if (!shared_state_.rasterizer_ready ||

+ shared_state_.drawn_frames_awaiting_finish.size() >= pipeline_depth_)

+ return;

+ frame_data = shared_state_.current_frame_data;

+ frame_data->draw_time = MojoGetTimeTicksNow();

+ TRACE_EVENT_FLOW_END1("gfx", "Frame Queued", frame_data.get(), "drawn",

+ true);

+ TRACE_EVENT_ASYNC_BEGIN0("gfx", "Rasterize", frame_data.get());

+ shared_state_.drawn_frames_awaiting_finish.emplace(frame_data);

}

- if (was_empty)

- PostSubmit();

+ rasterizer_->DrawFrame(frame_data->frame);

+ frame_data->wait_time = MojoGetTimeTicksNow();

}

-void GpuOutput::RasterizerDelegate::PostSubmit() {

- TRACE_EVENT0("gfx", "GpuOutput::RasterizerDelegate::PostSubmit");

- task_runner_->PostTask(FROM_HERE, base::Bind(&RasterizerDelegate::SubmitTask,

- base::Unretained(this)));

+void GpuOutput::OnRasterizerFinishedDraw(bool presented) {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

+ TRACE_EVENT0("gfx", "GpuOutput::OnRasterizerFinishedDraw");

+ const int64_t finish_time = MojoGetTimeTicksNow();

+ scoped_refptr<FrameData> frame_data; // drop outside lock

+ {

+ std::lock_guard<std::mutex> lock(shared_state_.mutex);

+ DCHECK(shared_state_.rasterizer_ready);

+ DCHECK(!shared_state_.drawn_frames_awaiting_finish.empty());

+ size_t draw_queue_depth = shared_state_.drawn_frames_awaiting_finish.size();

+ shared_state_.drawn_frames_awaiting_finish.front().swap(frame_data);

+ shared_state_.drawn_frames_awaiting_finish.pop();

+ DCHECK(frame_data);

+ DCHECK(frame_data->draw_time);

+ TRACE_EVENT_ASYNC_END1("gfx", "Rasterize", frame_data.get(), "presented",

+ presented);

+ // TODO(jeffbrown): Adjust scheduler behavior based on observed timing.

+ // Note: These measurements don't account for systematic downstream delay

+ // in the display pipeline (how long it takes pixels to actually light up).

+ if (presented) {

+ const RenderFrame::Metadata& frame_metadata =

+ frame_data->frame->metadata();

+ const mojo::gfx::composition::FrameInfo& frame_info =

+ frame_metadata.frame_info();

+ const int64_t frame_time = frame_info.frame_time;

+ const int64_t presentation_time = frame_info.presentation_time;

+ const int64_t composition_time = frame_metadata.composition_time();

+ const int64_t draw_time = frame_data->draw_time;

+ const int64_t wait_time = frame_data->wait_time;

+ const int64_t submit_time = frame_data->submit_time;

+ DVLOG(2) << "Presented frame: composition latency "

+ << (composition_time - frame_time) << " us, submission latency "

+ << (submit_time - composition_time) << " us, queue latency "

+ << (draw_time - submit_time) << " us, draw latency "

+ << (wait_time - draw_time) << " us, GPU latency "

+ << (finish_time - wait_time) << " us, total latency "

+ << (finish_time - frame_time) << " us, presentation time error "

+ << (finish_time - presentation_time) << " us"

+ << ", draw queue depth " << draw_queue_depth;

+ } else {

+ DVLOG(2) << "Rasterizer dropped frame.";

+ }

+ DCHECK(shared_state_.current_frame_data);

+ if (!shared_state_.current_frame_data->draw_time)

mikejurka 2016/05/19 23:26:18 again, what does it mean if draw_time is 0? how do

jeffbrown 2016/05/20 01:09:47 It's set in OnDraw() above. Basically means DrawF

+ ScheduleDrawLocked();

+ }

}

-void GpuOutput::RasterizerDelegate::InitializeTask(

- mojo::InterfaceHandle<mojo::ContextProvider> context_provider,

- const scoped_refptr<VsyncScheduler>& scheduler,

- const scoped_refptr<base::TaskRunner>& task_runner,

- const base::Closure& error_callback) {

+void GpuOutput::InitializeRasterizer(

+ mojo::InterfaceHandle<mojo::ContextProvider> context_provider) {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

+ DCHECK(!rasterizer_);

+ TRACE_EVENT0("gfx", "GpuOutput::InitializeRasterizer");

rasterizer_.reset(new GpuRasterizer(

- mojo::ContextProviderPtr::Create(std::move(context_provider)), scheduler,

- task_runner, error_callback));

+ mojo::ContextProviderPtr::Create(std::move(context_provider)), this));

+ rasterizer_initialized_.Signal();

}

-void GpuOutput::RasterizerDelegate::SubmitTask() {

- TRACE_EVENT0("gfx", "GpuOutput::RasterizerDelegate::SubmitTask");

- bool have_more;

- scoped_refptr<RenderFrame> frame;

- {

- std::lock_guard<std::mutex> lock(mutex_);

- DCHECK(!frames_.empty());

- frame = frames_.front();

- frames_.pop();

- have_more = !frames_.empty();

- }

+void GpuOutput::DestroyRasterizer() {

+ DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

+ DCHECK(rasterizer_);

+ TRACE_EVENT0("gfx", "GpuOutput::DestroyRasterizer");

- if (have_more)

- PostSubmit();

+ rasterizer_.reset();

+ rasterizer_initialized_.Reset();

- int64_t submit_time = MojoGetTimeTicksNow();

- rasterizer_->SubmitFrame(

- frame, base::Bind(&RasterizerDelegate::OnFrameSubmitted,

- base::Unretained(this), frame->frame_info().frame_time,

- frame->frame_info().presentation_time, submit_time));

+void GpuOutput::PostErrorCallback() {

+ compositor_task_runner_->PostTask(FROM_HERE, error_callback_);

}

-void GpuOutput::RasterizerDelegate::OnFrameSubmitted(int64_t frame_time,

- int64_t presentation_time,

- int64_t submit_time,

- bool presented) {

- TRACE_EVENT0("gfx", "GpuOutput::RasterizerDelegate::OnFrameSubmitted");

- // TODO(jeffbrown): Adjust scheduler behavior based on observed timing.

- // Note: These measurements don't account for systematic downstream delay

- // in the display pipeline (how long it takes pixels to actually light up).

- int64_t complete_time = MojoGetTimeTicksNow();

- if (presented) {

- DVLOG(3) << "Frame presented: submission latency "

- << (submit_time - frame_time) << " us, rasterization latency "

- << (complete_time - submit_time) << " us, total latency "

- << (complete_time - frame_time) << " us, presentation time error "

- << (complete_time - presentation_time);

- } else {

- DVLOG(3) << "Frame deferred.";

- }

+GpuOutput::FrameData::FrameData(const scoped_refptr<RenderFrame>& frame,

+ int64_t submit_time)

+ : frame(frame), submit_time(submit_time), draw_time(0), wait_time(0) {}

mikejurka 2016/05/20 01:49:23 shouldn't we still initialize these guys to 0?

jeffbrown 2016/05/20 19:31:35 It's done in the header file.

+GpuOutput::FrameData::~FrameData() {}

+void GpuOutput::FrameData::Recycle() {

+ draw_time = 0;

+ wait_time = 0;

}

} // namespace compositor