services/gfx/compositor/backend/gpu_output.cc - Issue 1995873002: Mozart: Improve tracing and backpressure.

Side by Side Diff: services/gfx/compositor/backend/gpu_output.cc

Issue 1995873002: Mozart: Improve tracing and backpressure. (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« services/gfx/compositor/backend/gpu_output.h ('K') | « services/gfx/compositor/backend/gpu_output.h ('k') | services/gfx/compositor/backend/gpu_rasterizer.h » ('j') | services/gfx/compositor/backend/gpu_rasterizer.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2015 The Chromium Authors. All rights reserved.	1 // Copyright 2015 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "services/gfx/compositor/backend/gpu_output.h"	5 #include "services/gfx/compositor/backend/gpu_output.h"

6	6

7 #include <utility>	7 #include <utility>

8	8

9 #include "base/bind.h"	9 #include "base/bind.h"

	10 #include "base/command_line.h"

10 #include "base/location.h"	11 #include "base/location.h"

11 #include "base/logging.h"	12 #include "base/logging.h"

12 #include "base/message_loop/message_loop.h"	13 #include "base/message_loop/message_loop.h"

	14 #include "base/strings/string_number_conversions.h"

13 #include "base/trace_event/trace_event.h"	15 #include "base/trace_event/trace_event.h"

14 #include "services/gfx/compositor/backend/gpu_rasterizer.h"

15 #include "services/gfx/compositor/render/render_frame.h"	16 #include "services/gfx/compositor/render/render_frame.h"

16	17

17 namespace compositor {	18 namespace compositor {

18 namespace {	19 namespace {

19 // Maximum number of frames to hold in the queue for rendering.	20 constexpr const char* kPipelineDepthSwitch = "pipeline-depth";

20 constexpr size_t kMaxPipelineDepth = 1;	21 constexpr uint32_t kDefaultPipelineDepth = 2u; // ideally should be 1

21 }	22 constexpr uint32_t kMinPipelineDepth = 1u;

22	23 constexpr uint32_t kMaxPipelineDepth = 10u;
	mikejurka 2016/05/19 23:26:18 why should we ever be more than 3 frames ahead? why should we ever be more than 3 frames ahead? jeffbrown 2016/05/20 01:09:47 This is sort of arbitrary. The limit only exists Show quoted text On 2016/05/19 23:26:18, mikejurka wrote: > why should we ever be more than 3 frames ahead? This is sort of arbitrary. The limit only exists for experimentation. A depth of 10 would be ludicrous!
23 template <typename T>	24

24 static void Drop(scoped_ptr<T> ptr) {}	25 scoped_ptr<base::MessagePump> CreateMessagePumpMojo() {

25

26 static scoped_ptr<base::MessagePump> CreateMessagePumpMojo() {

27 return base::MessageLoop::CreateMessagePumpForType(	26 return base::MessageLoop::CreateMessagePumpForType(

28 base::MessageLoop::TYPE_DEFAULT);	27 base::MessageLoop::TYPE_DEFAULT);

29 }	28 }

	29 } // namespace

30	30

31 GpuOutput::GpuOutput(	31 GpuOutput::GpuOutput(

32 mojo::InterfaceHandle<mojo::ContextProvider> context_provider,	32 mojo::InterfaceHandle<mojo::ContextProvider> context_provider,

33 const SchedulerCallbacks& scheduler_callbacks,	33 const SchedulerCallbacks& scheduler_callbacks,

34 const base::Closure& error_callback)	34 const base::Closure& error_callback)

35 : scheduler_(new VsyncScheduler(base::MessageLoop::current()->task_runner(),	35 : compositor_task_runner_(base::MessageLoop::current()->task_runner()),

36 scheduler_callbacks)),	36 vsync_scheduler_(

37 rasterizer_delegate_(make_scoped_ptr(new RasterizerDelegate())) {	37 new VsyncScheduler(compositor_task_runner_, scheduler_callbacks)),

	38 rasterizer_thread_(new base::Thread("gpu_rasterizer")),

	39 rasterizer_initialized_(true, false) {

38 DCHECK(context_provider);	40 DCHECK(context_provider);

39	41

40 rasterizer_delegate_->PostInitialize(	42 pipeline_depth_ = kDefaultPipelineDepth;

41 std::move(context_provider), scheduler_,	43 auto command_line = base::CommandLine::ForCurrentProcess();

42 base::MessageLoop::current()->task_runner(), error_callback);	44 if (command_line->HasSwitch(kPipelineDepthSwitch)) {

43 }	45 std::string str(command_line->GetSwitchValueASCII(kPipelineDepthSwitch));

44	46 unsigned value;

45 GpuOutput::~GpuOutput() {	47 if (base::StringToUint(str, &value) && value >= kMinPipelineDepth &&

46 // Ensure destruction happens on the correct thread.	48 value <= kMaxPipelineDepth) {

47 rasterizer_delegate_->PostDestroy(rasterizer_delegate_.Pass());	49 pipeline_depth_ = value;

48 }	50 } else {

49	51 LOG(ERROR) << "Invalid value for --" << kPipelineDepthSwitch << ": \""

50 Scheduler* GpuOutput::GetScheduler() {	52 << str << "\"";
	mikejurka 2016/05/19 23:26:18 is there a way to exit/crash here? should we call is there a way to exit/crash here? should we call PostErrorCallback() here? jeffbrown 2016/05/20 01:09:47 Hmm. Not a bad idea. Show quoted text On 2016/05/19 23:26:18, mikejurka wrote: > is there a way to exit/crash here? should we call PostErrorCallback() here? Hmm. Not a bad idea.
51 return scheduler_.get();	53 }

52 }	54 }

53	55 DVLOG(2) << "Using pipeline depth " << pipeline_depth_;

54 void GpuOutput::SubmitFrame(const scoped_refptr<RenderFrame>& frame) {	56

55 rasterizer_delegate_->PostFrame(frame);

56 }

57

58 GpuOutput::RasterizerDelegate::RasterizerDelegate() {

59 base::Thread::Options options;	57 base::Thread::Options options;

60 options.message_pump_factory = base::Bind(&CreateMessagePumpMojo);	58 options.message_pump_factory = base::Bind(&CreateMessagePumpMojo);

61	59 rasterizer_thread_->StartWithOptions(options);

62 thread_.reset(new base::Thread("gpu_rasterizer"));	60 rasterizer_task_runner_ = rasterizer_thread_->message_loop()->task_runner();

63 thread_->StartWithOptions(options);	61 rasterizer_task_runner_->PostTask(

64 task_runner_ = thread_->message_loop()->task_runner();

65 }

66

67 GpuOutput::RasterizerDelegate::~RasterizerDelegate() {}

68

69 void GpuOutput::RasterizerDelegate::PostInitialize(

70 mojo::InterfaceHandle<mojo::ContextProvider> context_provider,

71 const scoped_refptr<VsyncScheduler>& scheduler,

72 const scoped_refptr<base::TaskRunner>& task_runner,

73 const base::Closure& error_callback) {

74 task_runner_->PostTask(

75 FROM_HERE,	62 FROM_HERE,

76 base::Bind(&RasterizerDelegate::InitializeTask, base::Unretained(this),	63 base::Bind(&GpuOutput::InitializeRasterizer, base::Unretained(this),

77 base::Passed(std::move(context_provider)), scheduler,	64 base::Passed(std::move(context_provider))));

78 base::MessageLoop::current()->task_runner(), error_callback));	65 rasterizer_initialized_.Wait();

79 }	66 DCHECK(rasterizer_);

80	67 }

81 void GpuOutput::RasterizerDelegate::PostDestroy(	68

82 scoped_ptr<RasterizerDelegate> self) {	69 GpuOutput::~GpuOutput() {

83 task_runner_->PostTask(	70 // Ensure rasterizer destruction happens on the rasterizer thread.

84 FROM_HERE, base::Bind(&Drop<RasterizerDelegate>, base::Passed(&self)));	71 rasterizer_task_runner_->PostTask(

85 }	72 FROM_HERE,

86	73 base::Bind(&GpuOutput::DestroyRasterizer, base::Unretained(this)));

87 void GpuOutput::RasterizerDelegate::PostFrame(	74 rasterizer_thread_->Stop();
	mikejurka 2016/05/19 23:26:18 does ->Stop() block until the thread is done? does ->Stop() block until the thread is done? jeffbrown 2016/05/20 01:09:47 Yes. Show quoted text On 2016/05/19 23:26:18, mikejurka wrote: > does ->Stop() block until the thread is done? Yes.
88 const scoped_refptr<RenderFrame>& frame) {	75 DCHECK(!rasterizer_);

89 bool was_empty;	76 }

90 scoped_refptr<RenderFrame> dropped_frame;	77

91 {	78 Scheduler* GpuOutput::GetScheduler() {

92 std::lock_guard<std::mutex> lock(mutex_);	79 return vsync_scheduler_.get();

93 was_empty = frames_.empty();	80 }

94 if (frames_.size() == kMaxPipelineDepth) {	81

95 // TODO(jeffbrown): Adjust scheduler behavior to compensate.	82 void GpuOutput::SubmitFrame(const scoped_refptr<RenderFrame>& frame) {

96 LOG(ERROR) << "Renderer pipeline stalled, dropping a frame to catch up.";	83 DCHECK(frame);

97 dropped_frame = frames_.front(); // drop an old frame outside the lock	84 TRACE_EVENT0("gfx", "GpuOutput::SubmitFrame");

98 frames_.pop();	85

	86 const int64_t submit_time = MojoGetTimeTicksNow();

	87 scoped_refptr<FrameData> frame_data(
	mikejurka 2016/05/19 23:26:18 is there a way we can avoid using a scoped_refptr is there a way we can avoid using a scoped_refptr here (using scoped_ptr instead) ? also, like mentioned before, FrameData already inherits from RefCountedThreadSafe -- why do we need to wrap it again? jeffbrown 2016/05/20 01:09:47 These two things go together. There's no wrapping Show quoted text On 2016/05/19 23:26:18, mikejurka wrote: > is there a way we can avoid using a scoped_refptr here (using scoped_ptr > instead) ? > > also, like mentioned before, FrameData already inherits from > RefCountedThreadSafe -- why do we need to wrap it again? These two things go together. There's no wrapping involved.
	88 new FrameData(frame, submit_time)); // drop outside lock

	89 {

	90 std::lock_guard<std::mutex> lock(shared_state_.mutex);

	91

	92 TRACE_EVENT_FLOW_BEGIN0("gfx", "Frame Queued", frame_data.get());

	93 shared_state_.current_frame_data.swap(frame_data);

	94 if (frame_data && !frame_data->draw_time) {
	mikejurka 2016/05/19 23:26:18 what does it mean if draw_time is 0? does it mean what does it mean if draw_time is 0? does it mean this frame was not drawn? a comment would be helpful. jeffbrown 2016/05/20 01:09:47 Added a comment to the header file. Show quoted text On 2016/05/19 23:26:18, mikejurka wrote: > what does it mean if draw_time is 0? does it mean this frame was not drawn? a > comment would be helpful. Added a comment to the header file.
	95 // Dropped an undrawn frame.

	96 DVLOG(2) << "Rasterizer stalled, dropping frame to catch up.";

	97 TRACE_EVENT_FLOW_END1("gfx", "Frame Queued", frame_data.get(), "drawn",

	98 false);

99 }	99 }

100 frames_.push(frame);	100

101 }	101 // TODO(jeffbrown): If the draw queue is full, we should pause

102	102 // scheduling until the queue drains.

103 if (was_empty)	103 if (shared_state_.rasterizer_ready &&

104 PostSubmit();	104 shared_state_.drawn_frames_awaiting_finish.size() < pipeline_depth_)

105 }	105 ScheduleDrawLocked();

106	106 }

107 void GpuOutput::RasterizerDelegate::PostSubmit() {	107 }

108 TRACE_EVENT0("gfx", "GpuOutput::RasterizerDelegate::PostSubmit");	108

109 task_runner_->PostTask(FROM_HERE, base::Bind(&RasterizerDelegate::SubmitTask,	109 void GpuOutput::OnRasterizerReady(int64_t vsync_timebase,

110 base::Unretained(this)));	110 int64_t vsync_interval) {

111 }	111 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

112	112

113 void GpuOutput::RasterizerDelegate::InitializeTask(	113 // TODO(jeffbrown): This shouldn't be hardcoded.

114 mojo::InterfaceHandle<mojo::ContextProvider> context_provider,	114 // Need to do some real tuning and possibly determine values adaptively.

115 const scoped_refptr<VsyncScheduler>& scheduler,	115 // We should probably split the Start() method in two to separate the

116 const scoped_refptr<base::TaskRunner>& task_runner,	116 // process of setting parameters from starting / stopping scheduling.

117 const base::Closure& error_callback) {	117 const int64_t update_phase = -vsync_interval;

	118 const int64_t snapshot_phase = -vsync_interval / 6;

	119 // TODO(jeffbrown): Determine the presentation phase based on queue depth.

	120 const int64_t presentation_phase = vsync_interval * pipeline_depth_;

	121 if (!vsync_scheduler_->Start(vsync_timebase, vsync_interval, update_phase,

	122 snapshot_phase, presentation_phase)) {

	123 LOG(ERROR) << "Received invalid vsync parameters: timebase="

	124 << vsync_timebase << ", interval=" << vsync_interval;

	125 PostErrorCallback();

	126 return;

	127 }

	128

	129 {

	130 std::lock_guard<std::mutex> lock(shared_state_.mutex);

	131

	132 if (shared_state_.rasterizer_ready)

	133 return;

	134

	135 DCHECK(shared_state_.drawn_frames_awaiting_finish.empty());

	136 shared_state_.rasterizer_ready = true;

	137

	138 if (!shared_state_.current_frame_data)

	139 return;

	140

	141 shared_state_.current_frame_data->Recycle();

	142 TRACE_EVENT_FLOW_BEGIN0("gfx", "Frame Queued",

	143 shared_state_.current_frame_data.get());

	144 ScheduleDrawLocked();

	145 }

	146 }

	147

	148 void GpuOutput::OnRasterizerSuspended() {

	149 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

	150

	151 vsync_scheduler_->Stop();

	152

	153 {

	154 std::lock_guard<std::mutex> lock(shared_state_.mutex);

	155

	156 if (!shared_state_.rasterizer_ready)

	157 return;

	158

	159 shared_state_.rasterizer_ready = false;

	160 }

	161 }

	162

	163 void GpuOutput::OnRasterizerError() {

	164 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

	165

	166 PostErrorCallback();

	167 }

	168

	169 void GpuOutput::ScheduleDrawLocked() {

	170 DCHECK(shared_state_.current_frame_data);

	171 DCHECK(!shared_state_.current_frame_data->draw_time);

	172

	173 if (shared_state_.draw_scheduled)

	174 return;

	175

	176 shared_state_.draw_scheduled = true;

	177 rasterizer_task_runner_->PostTask(

	178 FROM_HERE, base::Bind(&GpuOutput::OnDraw, base::Unretained(this)));

	179 }

	180

	181 void GpuOutput::OnDraw() {

	182 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

	183 TRACE_EVENT0("gfx", "GpuOutput::OnDraw");

	184

	185 scoped_refptr<FrameData> frame_data; // used outside lock

	186 {

	187 std::lock_guard<std::mutex> lock(shared_state_.mutex);

	188

	189 DCHECK(shared_state_.draw_scheduled);

	190 DCHECK(shared_state_.current_frame_data);

	191 DCHECK(!shared_state_.current_frame_data->draw_time);

	192

	193 shared_state_.draw_scheduled = false;

	194

	195 if (!shared_state_.rasterizer_ready \|\|

	196 shared_state_.drawn_frames_awaiting_finish.size() >= pipeline_depth_)

	197 return;

	198

	199 frame_data = shared_state_.current_frame_data;

	200 frame_data->draw_time = MojoGetTimeTicksNow();

	201 TRACE_EVENT_FLOW_END1("gfx", "Frame Queued", frame_data.get(), "drawn",

	202 true);

	203

	204 TRACE_EVENT_ASYNC_BEGIN0("gfx", "Rasterize", frame_data.get());

	205 shared_state_.drawn_frames_awaiting_finish.emplace(frame_data);

	206 }

	207

	208 rasterizer_->DrawFrame(frame_data->frame);

	209 frame_data->wait_time = MojoGetTimeTicksNow();

	210 }

	211

	212 void GpuOutput::OnRasterizerFinishedDraw(bool presented) {

	213 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

	214 TRACE_EVENT0("gfx", "GpuOutput::OnRasterizerFinishedDraw");

	215

	216 const int64_t finish_time = MojoGetTimeTicksNow();

	217 scoped_refptr<FrameData> frame_data; // drop outside lock

	218 {

	219 std::lock_guard<std::mutex> lock(shared_state_.mutex);

	220

	221 DCHECK(shared_state_.rasterizer_ready);

	222 DCHECK(!shared_state_.drawn_frames_awaiting_finish.empty());

	223 size_t draw_queue_depth = shared_state_.drawn_frames_awaiting_finish.size();

	224 shared_state_.drawn_frames_awaiting_finish.front().swap(frame_data);

	225 shared_state_.drawn_frames_awaiting_finish.pop();

	226 DCHECK(frame_data);

	227 DCHECK(frame_data->draw_time);

	228 TRACE_EVENT_ASYNC_END1("gfx", "Rasterize", frame_data.get(), "presented",

	229 presented);

	230

	231 // TODO(jeffbrown): Adjust scheduler behavior based on observed timing.

	232 // Note: These measurements don't account for systematic downstream delay

	233 // in the display pipeline (how long it takes pixels to actually light up).

	234 if (presented) {

	235 const RenderFrame::Metadata& frame_metadata =

	236 frame_data->frame->metadata();

	237 const mojo::gfx::composition::FrameInfo& frame_info =

	238 frame_metadata.frame_info();

	239 const int64_t frame_time = frame_info.frame_time;

	240 const int64_t presentation_time = frame_info.presentation_time;

	241 const int64_t composition_time = frame_metadata.composition_time();

	242 const int64_t draw_time = frame_data->draw_time;

	243 const int64_t wait_time = frame_data->wait_time;

	244 const int64_t submit_time = frame_data->submit_time;

	245

	246 DVLOG(2) << "Presented frame: composition latency "

	247 << (composition_time - frame_time) << " us, submission latency "

	248 << (submit_time - composition_time) << " us, queue latency "

	249 << (draw_time - submit_time) << " us, draw latency "

	250 << (wait_time - draw_time) << " us, GPU latency "

	251 << (finish_time - wait_time) << " us, total latency "

	252 << (finish_time - frame_time) << " us, presentation time error "

	253 << (finish_time - presentation_time) << " us"

	254 << ", draw queue depth " << draw_queue_depth;

	255 } else {

	256 DVLOG(2) << "Rasterizer dropped frame.";

	257 }

	258

	259 DCHECK(shared_state_.current_frame_data);

	260 if (!shared_state_.current_frame_data->draw_time)
	mikejurka 2016/05/19 23:26:18 again, what does it mean if draw_time is 0? how do again, what does it mean if draw_time is 0? how does that differ from "presented"? jeffbrown 2016/05/20 01:09:47 It's set in OnDraw() above. Basically means DrawF Show quoted text On 2016/05/19 23:26:18, mikejurka wrote: > again, what does it mean if draw_time is 0? how does that differ from > "presented"? It's set in OnDraw() above. Basically means DrawFrame was called. It seemed expedient to rely on the fact that time ticks are essentially always positive after boot but I guess it wouldn't hurt to make the flag explicit so I'll do that for clarity.
	261 ScheduleDrawLocked();

	262 }

	263 }

	264

	265 void GpuOutput::InitializeRasterizer(

	266 mojo::InterfaceHandle<mojo::ContextProvider> context_provider) {

	267 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

	268 DCHECK(!rasterizer_);

	269 TRACE_EVENT0("gfx", "GpuOutput::InitializeRasterizer");

	270

118 rasterizer_.reset(new GpuRasterizer(	271 rasterizer_.reset(new GpuRasterizer(

119 mojo::ContextProviderPtr::Create(std::move(context_provider)), scheduler,	272 mojo::ContextProviderPtr::Create(std::move(context_provider)), this));

120 task_runner, error_callback));	273 rasterizer_initialized_.Signal();

121 }	274 }

122	275

123 void GpuOutput::RasterizerDelegate::SubmitTask() {	276 void GpuOutput::DestroyRasterizer() {

124 TRACE_EVENT0("gfx", "GpuOutput::RasterizerDelegate::SubmitTask");	277 DCHECK(rasterizer_task_runner_->RunsTasksOnCurrentThread());

125 bool have_more;	278 DCHECK(rasterizer_);

126 scoped_refptr<RenderFrame> frame;	279 TRACE_EVENT0("gfx", "GpuOutput::DestroyRasterizer");

127 {	280

128 std::lock_guard<std::mutex> lock(mutex_);	281 rasterizer_.reset();

129 DCHECK(!frames_.empty());	282 rasterizer_initialized_.Reset();

130 frame = frames_.front();	283 }

131 frames_.pop();	284

132 have_more = !frames_.empty();	285 void GpuOutput::PostErrorCallback() {

133 }	286 compositor_task_runner_->PostTask(FROM_HERE, error_callback_);

134	287 }

135 if (have_more)	288

136 PostSubmit();	289 GpuOutput::FrameData::FrameData(const scoped_refptr<RenderFrame>& frame,

137	290 int64_t submit_time)

138 int64_t submit_time = MojoGetTimeTicksNow();	291 : frame(frame), submit_time(submit_time), draw_time(0), wait_time(0) {}
	mikejurka 2016/05/20 01:49:23 shouldn't we still initialize these guys to 0? shouldn't we still initialize these guys to 0? jeffbrown 2016/05/20 19:31:35 It's done in the header file. Show quoted text On 2016/05/20 01:49:23, mikejurka wrote: > shouldn't we still initialize these guys to 0? It's done in the header file.
139 rasterizer_->SubmitFrame(	292

140 frame, base::Bind(&RasterizerDelegate::OnFrameSubmitted,	293 GpuOutput::FrameData::~FrameData() {}

141 base::Unretained(this), frame->frame_info().frame_time,	294

142 frame->frame_info().presentation_time, submit_time));	295 void GpuOutput::FrameData::Recycle() {

143 }	296 draw_time = 0;

144	297 wait_time = 0;

145 void GpuOutput::RasterizerDelegate::OnFrameSubmitted(int64_t frame_time,

146 int64_t presentation_time,

147 int64_t submit_time,

148 bool presented) {

149 TRACE_EVENT0("gfx", "GpuOutput::RasterizerDelegate::OnFrameSubmitted");

150 // TODO(jeffbrown): Adjust scheduler behavior based on observed timing.

151 // Note: These measurements don't account for systematic downstream delay

152 // in the display pipeline (how long it takes pixels to actually light up).

153 int64_t complete_time = MojoGetTimeTicksNow();

154 if (presented) {

155 DVLOG(3) << "Frame presented: submission latency "

156 << (submit_time - frame_time) << " us, rasterization latency "

157 << (complete_time - submit_time) << " us, total latency "

158 << (complete_time - frame_time) << " us, presentation time error "

159 << (complete_time - presentation_time);

160 } else {

161 DVLOG(3) << "Frame deferred.";

162 }

163 }	298 }

164	299

165 } // namespace compositor	300 } // namespace compositor

OLD	NEW