content/child/child_thread_impl_perftest.cc - Issue 1187793006: content: perf tests for GpuMemoryBuffers mapping and data coherency

Unified Diff: content/child/child_thread_impl_perftest.cc

Issue 1187793006: content: perf tests for GpuMemoryBuffers mapping and data coherency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: add description, move from DRAFT to a real CL Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: content/child/child_thread_impl_perftest.cc

diff --git a/content/child/child_thread_impl_perftest.cc b/content/child/child_thread_impl_perftest.cc

new file mode 100644

index 0000000000000000000000000000000000000000..abe88032fa5b8c51b9e83cf72de94ffdad5a0aef

--- /dev/null

+++ b/content/child/child_thread_impl_perftest.cc

@@ -0,0 +1,222 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// The idea is to benchmark how the hardware, on different usages of

+// GpuMemoryBuffer, performs when the native buffer object (bo) is mapped into

+// the CPU. In particular this test aims to capture the effects of data

+// coherency and answer the following:

+//

+// - measure memory mapping performance of GpuMemoryBuffer using shared memory

+// (fallback case) and also native implementation of it.

+// - what if the Renderer process (client) just writes into the buffer object?

+// - what's the effect of reading from a write-combining (WC) memory? can we

+// avoid read backs?

+// - should it be UC and/or WC mapped, to get a faster access?

+// - what the effect of clients doing sequential writes or non-sequential? If

+// the latter, a WC mapping may end up being very slow.

+#include "base/bind.h"

+#include "base/command_line.h"

+#include "base/memory/scoped_vector.h"

+#include "base/time/time.h"

+#include "content/child/child_gpu_memory_buffer_manager.h"

+#include "content/child/child_thread_impl.h"

+#include "content/common/gpu/client/gpu_memory_buffer_impl.h"

+#include "content/public/common/content_switches.h"

+#include "content/public/test/content_browser_test.h"

+#include "content/public/test/content_browser_test_utils.h"

+#include "content/shell/browser/shell.h"

+#include "testing/perf/perf_test.h"

+#include "url/gurl.h"

+namespace content {

+namespace {

+ChildGpuMemoryBufferManager* child_gpu_memory_buffer_manager_ = NULL;

dshwang 2015/06/25 10:52:11 nullptr. why don't BufferPerfTest have this as mem

+static const int kNumRuns = 30;

dshwang 2015/06/25 10:52:11 'static' is not needed because of anonymous namesp

+enum NativeBufferFlag { kDisableNativeBuffers, kEnableNativeBuffers };

+std::string NativeBufferFlagName(NativeBufferFlag flag) {

+ switch (flag) {

+ case kDisableNativeBuffers:

+ return "";

+ case kEnableNativeBuffers:

+ return "_native";

+ }

+ NOTREACHED();

+ return "";

+static NativeBufferFlag native_buffer_flag_;

dshwang 2015/06/25 10:52:11 why don't ChildThreadImplGpuMemoryBufferPerfTest h

+enum MemoryOperation { kMemoryOperationWrite, kMemoryOperationNoop };

dshwang 2015/06/25 10:52:11 unused

+class BufferPerfTest {

+ public:

+ BufferPerfTest ()

+ : gpu_memory_buffer_(nullptr),

+ num_planes_(0),

+ format_(gfx::GpuMemoryBuffer::BGRA_8888),

+ buffer_size_(4, 4) {}

+ void Allocate(void) {

+ gpu_memory_buffer_ =

+ child_gpu_memory_buffer_manager_->AllocateGpuMemoryBuffer(

+ buffer_size_, format_, gfx::GpuMemoryBuffer::MAP);

+ ASSERT_TRUE(gpu_memory_buffer_);

+ EXPECT_EQ(format_, gpu_memory_buffer_->GetFormat());

+ num_planes_ =

+ GpuMemoryBufferImpl::NumberOfPlanesForGpuMemoryBufferFormat(format_);

+ }

+ void Map(scoped_ptr<void* []> const &planes, std::string operation_name) {

+ std::string flag_name = NativeBufferFlagName(native_buffer_flag_);

+ base::TimeTicks start = base::TimeTicks::Now();

+ bool rv = gpu_memory_buffer_->Map(planes.get());

+ base::TimeTicks end = base::TimeTicks::Now();

+ ASSERT_TRUE(rv);

+ EXPECT_TRUE(gpu_memory_buffer_->IsMapped());

+ // TODO(vignatti): get the mean time and print to stdout only once. At the

+ // moment it's being useful to check individual runs though cause for

+ // example VGEM has way worse performance on its first runs (got check why).

+ perf_test::PrintResult(

dshwang 2015/06/25 10:52:11 This test measures only Map time even in "time_to_

+ "time_to_execute_map",

+ flag_name,

+ operation_name,

+ static_cast<size_t>((end - start).InMicroseconds()),

+ "us", true);

+ }

+ void Unmap(void) {

+ gpu_memory_buffer_->Unmap();

+ EXPECT_FALSE(gpu_memory_buffer_->IsMapped());

+ }

+ void Write(scoped_ptr<void* []> const &planes) {

+ // Get stride.

+ scoped_ptr<int[]> strides(new int[num_planes_]);

+ gpu_memory_buffer_->GetStride(strides.get());

+ for (size_t plane = 0; plane < num_planes_; ++plane) {

+ size_t row_size_in_bytes = 0;

+ EXPECT_TRUE(GpuMemoryBufferImpl::RowSizeInBytes(buffer_size_.width(),

+ format_, plane, &row_size_in_bytes));

+ scoped_ptr<char[]> data(new char[row_size_in_bytes]);

+ memset(data.get(), 0x2a + plane, row_size_in_bytes);

+ size_t height = buffer_size_.height() /

+ GpuMemoryBufferImpl::SubsamplingFactor(format_, plane);

+ for (size_t y = 0; y < height; ++y) {

+ memcpy(static_cast<char*>(planes[plane]) + y * strides[plane],

+ data.get(),

+ row_size_in_bytes);

+#if defined(NDEBUG)

+ EXPECT_EQ(memcmp(static_cast<char*>(planes[plane]) + y * strides[plane],

+ data.get(), row_size_in_bytes),

+ 0);

+#endif

+ }

+ size_t GetNumPlanes() { return num_planes_; }

+ private:

+ scoped_ptr<gfx::GpuMemoryBuffer> gpu_memory_buffer_;

+ size_t num_planes_;

+ gfx::GpuMemoryBuffer::Format format_;

+ gfx::Size buffer_size_;

+};

+class ChildThreadImplBrowserTest : public ContentBrowserTest {

+ public:

+ ChildThreadImplBrowserTest() {}

+ // Overridden from BrowserTestBase:

+ void SetUpCommandLine(base::CommandLine* command_line) override {

+ command_line->AppendSwitch(switches::kSingleProcess);

+ }

+ void SetUpOnMainThread() override {

+ NavigateToURL(shell(), GURL(url::kAboutBlankURL));

+ PostTaskToInProcessRendererAndWait(

+ base::Bind(&ChildThreadImplBrowserTest::SetUpOnChildThread, this));

+ }

+ private:

+ void SetUpOnChildThread() {

+ child_gpu_memory_buffer_manager_ =

+ ChildThreadImpl::current()->gpu_memory_buffer_manager();

+ }

+};

+class ChildThreadImplGpuMemoryBufferPerfTest

+ : public ChildThreadImplBrowserTest,

+ public testing::WithParamInterface<

+ ::testing::tuple<NativeBufferFlag>> {

+ public:

+ ChildThreadImplGpuMemoryBufferPerfTest() {}

+ // Overridden from BrowserTestBase:

+ void SetUpCommandLine(base::CommandLine* command_line) override {

+ ChildThreadImplBrowserTest::SetUpCommandLine(command_line);

+ native_buffer_flag_ = ::testing::get<0>(GetParam());

+ switch (native_buffer_flag_) {

+ case kEnableNativeBuffers:

+ command_line->AppendSwitch(switches::kEnableNativeGpuMemoryBuffers);

+ break;

+ case kDisableNativeBuffers:

+ break;

+ }

+ protected:

+ scoped_ptr<BufferPerfTest> buffer_;

+ private:

+ DISALLOW_COPY_AND_ASSIGN(ChildThreadImplGpuMemoryBufferPerfTest);

+};

+IN_PROC_BROWSER_TEST_P(ChildThreadImplGpuMemoryBufferPerfTest,

+ Write) {

+ buffer_.reset(new BufferPerfTest());

+ buffer_->Allocate();

+ scoped_ptr<void* []> planes(new void* [buffer_->GetNumPlanes()]);

+ for (int i = 0; i < kNumRuns; ++i) {

+ buffer_->Map(planes, "Write");

+ buffer_->Write(planes);

dshwang 2015/06/25 10:52:11 I think Map and Unmap should be out of 'for block'

+ buffer_->Unmap();

+ }

+IN_PROC_BROWSER_TEST_P(ChildThreadImplGpuMemoryBufferPerfTest,

+ Map) {

+ buffer_.reset(new BufferPerfTest());

+ buffer_->Allocate();

+ scoped_ptr<void* []> planes(new void* [buffer_->GetNumPlanes()]);

+ for (int i = 0; i < kNumRuns; ++i) {

+ buffer_->Map(planes, "Map");

+ buffer_->Unmap();

+ }

+INSTANTIATE_TEST_CASE_P(

+ ChildThreadImplGpuMemoryBufferPerfTests,

+ ChildThreadImplGpuMemoryBufferPerfTest,

+ ::testing::Values(kDisableNativeBuffers, kEnableNativeBuffers));

+} // namespace

+} // namespace content

« no previous file with comments | « no previous file | content/content_tests.gypi » ('j') | no next file with comments »