Chromium Code Reviews| Index: content/child/child_thread_impl_perftest.cc |
| diff --git a/content/child/child_thread_impl_perftest.cc b/content/child/child_thread_impl_perftest.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..abe88032fa5b8c51b9e83cf72de94ffdad5a0aef |
| --- /dev/null |
| +++ b/content/child/child_thread_impl_perftest.cc |
| @@ -0,0 +1,222 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +// The idea is to benchmark how the hardware, on different usages of |
| +// GpuMemoryBuffer, performs when the native buffer object (bo) is mapped into |
| +// the CPU. In particular this test aims to capture the effects of data |
| +// coherency and answer the following: |
| +// |
| +// - measure memory mapping performance of GpuMemoryBuffer using shared memory |
| +// (fallback case) and also native implementation of it. |
| +// - what if the Renderer process (client) just writes into the buffer object? |
| +// - what's the effect of reading from a write-combining (WC) memory? can we |
| +// avoid read backs? |
| +// - should it be UC and/or WC mapped, to get a faster access? |
| +// - what the effect of clients doing sequential writes or non-sequential? If |
| +// the latter, a WC mapping may end up being very slow. |
| + |
| +#include "base/bind.h" |
| +#include "base/command_line.h" |
| +#include "base/memory/scoped_vector.h" |
| +#include "base/time/time.h" |
| +#include "content/child/child_gpu_memory_buffer_manager.h" |
| +#include "content/child/child_thread_impl.h" |
| +#include "content/common/gpu/client/gpu_memory_buffer_impl.h" |
| +#include "content/public/common/content_switches.h" |
| +#include "content/public/test/content_browser_test.h" |
| +#include "content/public/test/content_browser_test_utils.h" |
| +#include "content/shell/browser/shell.h" |
| +#include "testing/perf/perf_test.h" |
| +#include "url/gurl.h" |
| + |
| +namespace content { |
| +namespace { |
| + |
| +ChildGpuMemoryBufferManager* child_gpu_memory_buffer_manager_ = NULL; |
|
dshwang
2015/06/25 10:52:11
nullptr.
why don't BufferPerfTest have this as mem
|
| + |
| +static const int kNumRuns = 30; |
|
dshwang
2015/06/25 10:52:11
'static' is not needed because of anonymous namesp
|
| + |
| +enum NativeBufferFlag { kDisableNativeBuffers, kEnableNativeBuffers }; |
| + |
| +std::string NativeBufferFlagName(NativeBufferFlag flag) { |
| + switch (flag) { |
| + case kDisableNativeBuffers: |
| + return ""; |
| + case kEnableNativeBuffers: |
| + return "_native"; |
| + } |
| + |
| + NOTREACHED(); |
| + return ""; |
| +} |
| + |
| +static NativeBufferFlag native_buffer_flag_; |
|
dshwang
2015/06/25 10:52:11
why don't ChildThreadImplGpuMemoryBufferPerfTest h
|
| + |
| +enum MemoryOperation { kMemoryOperationWrite, kMemoryOperationNoop }; |
|
dshwang
2015/06/25 10:52:11
unused
|
| + |
| +class BufferPerfTest { |
| + public: |
| + BufferPerfTest () |
| + : gpu_memory_buffer_(nullptr), |
| + num_planes_(0), |
| + format_(gfx::GpuMemoryBuffer::BGRA_8888), |
| + buffer_size_(4, 4) {} |
| + |
| + void Allocate(void) { |
| + gpu_memory_buffer_ = |
| + child_gpu_memory_buffer_manager_->AllocateGpuMemoryBuffer( |
| + buffer_size_, format_, gfx::GpuMemoryBuffer::MAP); |
| + ASSERT_TRUE(gpu_memory_buffer_); |
| + |
| + EXPECT_EQ(format_, gpu_memory_buffer_->GetFormat()); |
| + |
| + num_planes_ = |
| + GpuMemoryBufferImpl::NumberOfPlanesForGpuMemoryBufferFormat(format_); |
| + } |
| + |
| + void Map(scoped_ptr<void* []> const &planes, std::string operation_name) { |
| + std::string flag_name = NativeBufferFlagName(native_buffer_flag_); |
| + |
| + base::TimeTicks start = base::TimeTicks::Now(); |
| + bool rv = gpu_memory_buffer_->Map(planes.get()); |
| + base::TimeTicks end = base::TimeTicks::Now(); |
| + ASSERT_TRUE(rv); |
| + EXPECT_TRUE(gpu_memory_buffer_->IsMapped()); |
| + |
| + // TODO(vignatti): get the mean time and print to stdout only once. At the |
| + // moment it's being useful to check individual runs though cause for |
| + // example VGEM has way worse performance on its first runs (got check why). |
| + perf_test::PrintResult( |
|
dshwang
2015/06/25 10:52:11
This test measures only Map time even in "time_to_
|
| + "time_to_execute_map", |
| + flag_name, |
| + operation_name, |
| + static_cast<size_t>((end - start).InMicroseconds()), |
| + "us", true); |
| + } |
| + |
| + void Unmap(void) { |
| + gpu_memory_buffer_->Unmap(); |
| + EXPECT_FALSE(gpu_memory_buffer_->IsMapped()); |
| + } |
| + |
| + void Write(scoped_ptr<void* []> const &planes) { |
| + // Get stride. |
| + scoped_ptr<int[]> strides(new int[num_planes_]); |
| + gpu_memory_buffer_->GetStride(strides.get()); |
| + |
| + for (size_t plane = 0; plane < num_planes_; ++plane) { |
| + size_t row_size_in_bytes = 0; |
| + EXPECT_TRUE(GpuMemoryBufferImpl::RowSizeInBytes(buffer_size_.width(), |
| + format_, plane, &row_size_in_bytes)); |
| + |
| + scoped_ptr<char[]> data(new char[row_size_in_bytes]); |
| + memset(data.get(), 0x2a + plane, row_size_in_bytes); |
| + |
| + size_t height = buffer_size_.height() / |
| + GpuMemoryBufferImpl::SubsamplingFactor(format_, plane); |
| + for (size_t y = 0; y < height; ++y) { |
| + // Copy |data| to row |y| of |plane| and verify result. |
| + memcpy(static_cast<char*>(planes[plane]) + y * strides[plane], |
| + data.get(), |
| + row_size_in_bytes); |
| +#if defined(NDEBUG) |
| + EXPECT_EQ(memcmp(static_cast<char*>(planes[plane]) + y * strides[plane], |
| + data.get(), row_size_in_bytes), |
| + 0); |
| +#endif |
| + } |
| + } |
| + } |
| + |
| + size_t GetNumPlanes() { return num_planes_; } |
| + |
| + private: |
| + scoped_ptr<gfx::GpuMemoryBuffer> gpu_memory_buffer_; |
| + size_t num_planes_; |
| + gfx::GpuMemoryBuffer::Format format_; |
| + gfx::Size buffer_size_; |
| +}; |
| + |
| +class ChildThreadImplBrowserTest : public ContentBrowserTest { |
| + public: |
| + ChildThreadImplBrowserTest() {} |
| + |
| + // Overridden from BrowserTestBase: |
| + void SetUpCommandLine(base::CommandLine* command_line) override { |
| + command_line->AppendSwitch(switches::kSingleProcess); |
| + } |
| + void SetUpOnMainThread() override { |
| + NavigateToURL(shell(), GURL(url::kAboutBlankURL)); |
| + PostTaskToInProcessRendererAndWait( |
| + base::Bind(&ChildThreadImplBrowserTest::SetUpOnChildThread, this)); |
| + } |
| + |
| + private: |
| + void SetUpOnChildThread() { |
| + child_gpu_memory_buffer_manager_ = |
| + ChildThreadImpl::current()->gpu_memory_buffer_manager(); |
| + } |
| + |
| +}; |
| + |
| +class ChildThreadImplGpuMemoryBufferPerfTest |
| + : public ChildThreadImplBrowserTest, |
| + public testing::WithParamInterface< |
| + ::testing::tuple<NativeBufferFlag>> { |
| + public: |
| + ChildThreadImplGpuMemoryBufferPerfTest() {} |
| + |
| + // Overridden from BrowserTestBase: |
| + void SetUpCommandLine(base::CommandLine* command_line) override { |
| + ChildThreadImplBrowserTest::SetUpCommandLine(command_line); |
| + native_buffer_flag_ = ::testing::get<0>(GetParam()); |
| + switch (native_buffer_flag_) { |
| + case kEnableNativeBuffers: |
| + command_line->AppendSwitch(switches::kEnableNativeGpuMemoryBuffers); |
| + break; |
| + case kDisableNativeBuffers: |
| + break; |
| + } |
| + } |
| + |
| + protected: |
| + scoped_ptr<BufferPerfTest> buffer_; |
| + |
| + private: |
| + DISALLOW_COPY_AND_ASSIGN(ChildThreadImplGpuMemoryBufferPerfTest); |
| +}; |
| + |
| +IN_PROC_BROWSER_TEST_P(ChildThreadImplGpuMemoryBufferPerfTest, |
| + Write) { |
| + buffer_.reset(new BufferPerfTest()); |
| + buffer_->Allocate(); |
| + |
| + scoped_ptr<void* []> planes(new void* [buffer_->GetNumPlanes()]); |
| + |
| + for (int i = 0; i < kNumRuns; ++i) { |
| + buffer_->Map(planes, "Write"); |
| + buffer_->Write(planes); |
|
dshwang
2015/06/25 10:52:11
I think Map and Unmap should be out of 'for block'
|
| + buffer_->Unmap(); |
| + } |
| +} |
| + |
| +IN_PROC_BROWSER_TEST_P(ChildThreadImplGpuMemoryBufferPerfTest, |
| + Map) { |
| + buffer_.reset(new BufferPerfTest()); |
| + buffer_->Allocate(); |
| + |
| + scoped_ptr<void* []> planes(new void* [buffer_->GetNumPlanes()]); |
| + |
| + for (int i = 0; i < kNumRuns; ++i) { |
| + buffer_->Map(planes, "Map"); |
| + buffer_->Unmap(); |
| + } |
| +} |
| + |
| +INSTANTIATE_TEST_CASE_P( |
| + ChildThreadImplGpuMemoryBufferPerfTests, |
| + ChildThreadImplGpuMemoryBufferPerfTest, |
| + ::testing::Values(kDisableNativeBuffers, kEnableNativeBuffers)); |
| +} // namespace |
| +} // namespace content |