Index: content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc |
diff --git a/content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc b/content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..34a652f93e05578c730b4c48f5ad3adba6787868 |
--- /dev/null |
+++ b/content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc |
@@ -0,0 +1,243 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+// The idea is to benchmark how the hardware, on different usages of |
+// GpuMemoryBuffer, performs when a native buffer object is mapped into the |
+// CPU. In particular this test aims to capture the effects of data coherency |
+// and could be extended further to answer the following: |
+// |
+// - measure memory mapping performance of GpuMemoryBuffer using shared memory |
+// (fallback case) and also native implementation. |
+// - what if the Renderer process (client) just writes into the buffer object? |
+// - what's the effect of reading from a write-combining (WC) memory? can we |
+// avoid read backs? |
+// - should it be UC and/or WC mapped, to get a faster access? |
+// - what the effect of clients doing sequential writes or non-sequential? If |
+// the latter, a WC mapping may end up being very slow. |
+ |
+#include "content/common/gpu/client/gpu_memory_buffer_impl.h" |
+ |
+#include "base/bind.h" |
+#include "content/common/gpu/gpu_memory_buffer_factory.h" |
+#include "testing/gtest/include/gtest/gtest.h" |
+#include "testing/perf/perf_test.h" |
+ |
+namespace content { |
+namespace { |
+ |
+const int kNumRuns = 1000; |
+const int kClientId = 1; |
+ |
+// Do we want to move this into a public API? |
+std::string GpuMemoryBufferTypeName(gfx::GpuMemoryBufferType type) { |
+ switch (type) { |
+ case gfx::SHARED_MEMORY_BUFFER: |
+ return "shared_memory"; |
+ case gfx::IO_SURFACE_BUFFER: |
+ return "io_surface"; |
+ case gfx::SURFACE_TEXTURE_BUFFER: |
+ return "surface_texture"; |
+ case gfx::OZONE_NATIVE_BUFFER: |
+ return "ozone_native"; |
+ default: |
+ NOTREACHED(); |
+ return ""; |
+ } |
+} |
+ |
+class GpuMemoryBufferPerfTest |
+ : public testing::TestWithParam<gfx::GpuMemoryBufferType> { |
+ public: |
+ GpuMemoryBufferPerfTest() |
+ : buffer_count_(0), |
+ buffer_size_(128, 128), |
+ buffer_(nullptr), |
+ factory_(nullptr) {} |
+ |
+ // Overridden from testing::Test: |
+ void SetUp() override { |
+ factory_ = GpuMemoryBufferFactory::Create(GetParam()); |
+ } |
+ void TearDown() override { factory_.reset(); } |
+ |
+ gfx::GpuMemoryBufferHandle CreateGpuMemoryBuffer( |
+ gfx::GpuMemoryBufferId id, |
+ const gfx::Size& size, |
+ gfx::GpuMemoryBuffer::Format format, |
+ gfx::GpuMemoryBuffer::Usage usage) { |
+ ++buffer_count_; |
+ return factory_->CreateGpuMemoryBuffer(id, size, format, usage, kClientId, |
+ gfx::kNullPluginWindow); |
+ } |
+ |
+ void DestroyGpuMemoryBuffer(gfx::GpuMemoryBufferId id, uint32 sync_point) { |
+ factory_->DestroyGpuMemoryBuffer(id, kClientId); |
+ DCHECK_GT(buffer_count_, 0); |
+ --buffer_count_; |
+ } |
+ |
+ void CreateTestGpuMemoryBuffer(void) { |
+ const int kBufferId = 1; |
+ const gfx::GpuMemoryBuffer::Format format = |
+ gfx::GpuMemoryBuffer::BGRA_8888; |
+ const gfx::GpuMemoryBuffer::Usage usage = gfx::GpuMemoryBuffer::MAP; |
+ total_time_ = 0; |
+ |
+ handle_ = |
+ CreateGpuMemoryBuffer(kBufferId, buffer_size_, format, usage); |
+ |
+ buffer_ = |
+ GpuMemoryBufferImpl::CreateFromHandle( |
+ handle_, |
+ buffer_size_, format, usage, |
+ base::Bind(&GpuMemoryBufferPerfTest::DestroyGpuMemoryBuffer, |
+ base::Unretained(this), kBufferId)); |
+ ASSERT_TRUE(buffer_); |
+ EXPECT_FALSE(buffer_->IsMapped()); |
+ } |
+ |
+ void DestroyTestGpuMemoryBuffer(void) { |
+ buffer_.reset(); |
+ } |
+ |
+ void BufferMap(scoped_ptr<void* []> const &buffers) { |
+ base::TimeTicks start = base::TimeTicks::Now(); |
+ bool rv = buffer_->Map(buffers.get()); |
+ total_time_ += |
+ static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); |
+ ASSERT_TRUE(rv); |
+ EXPECT_TRUE(buffer_->IsMapped()); |
+ } |
+ |
+ void BufferUnmap(void) { |
+ buffer_->Unmap(); |
+ EXPECT_FALSE(buffer_->IsMapped()); |
+ } |
+ |
+ void BufferRead(volatile uint32_t *ptr) { |
+ int j; |
+ int size = buffer_size_.width() * buffer_size_.height(); |
+ int x = 0; |
+ |
+ for (j = 0; j < static_cast<int>(size/sizeof(*ptr)); j++) |
+ x += ptr[j]; |
+ |
+ /* force overtly clever gcc to actually compute x */ |
+ ptr[0] = x; |
+ } |
+ |
+ void BufferWrite(volatile uint32_t *ptr) { |
+ int j; |
+ int size = buffer_size_.width() * buffer_size_.height(); |
+ |
+ for (j = 0; j < static_cast<int>(size/sizeof(*ptr)); j++) |
+ ptr[j] = j; |
+ } |
+ |
+ void PrintTestGpuMemoryBuffer(std::string name) { |
+ // Sometimes the first runs have worse performance so it might be useful to |
+ // print out the standard deviation as well to capture this discrepancy. |
+ perf_test::PrintResult( |
+ "gpu_memory_buffer_time_", |
+ GpuMemoryBufferTypeName(handle_.type), |
+ name, |
+ total_time_ / static_cast<double>(kNumRuns), |
+ "us/task", |
+ true); |
+ } |
+ double total_time_; |
+ |
+ private: |
+ int buffer_count_; |
+ gfx::Size buffer_size_; |
+ gfx::GpuMemoryBufferHandle handle_; |
+ scoped_ptr<GpuMemoryBufferImpl> buffer_; |
+ scoped_ptr<GpuMemoryBufferFactory> factory_; |
+}; |
+ |
+TEST_P(GpuMemoryBufferPerfTest, MapUnmap) { |
+ scoped_ptr<void*[]> mapped_buffers(new void*[0]); |
+ |
+ CreateTestGpuMemoryBuffer(); |
+ |
+ base::TimeTicks start = base::TimeTicks::Now(); |
+ for (int i = 0; i < kNumRuns; ++i) { |
+ BufferMap(mapped_buffers); |
+ BufferUnmap(); |
+ } |
+ total_time_ += |
+ static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); |
+ |
+ DestroyTestGpuMemoryBuffer(); |
+ PrintTestGpuMemoryBuffer("map & unmap "); |
+} |
+ |
+TEST_P(GpuMemoryBufferPerfTest, Read) { |
+ scoped_ptr<void*[]> mapped_buffers(new void*[0]); |
+ |
+ CreateTestGpuMemoryBuffer(); |
+ |
+ base::TimeTicks start = base::TimeTicks::Now(); |
+ for (int i = 0; i < kNumRuns; ++i) { |
+ BufferMap(mapped_buffers); |
+ BufferRead(static_cast<uint32_t*>(mapped_buffers[0])); |
+ BufferUnmap(); |
+ } |
+ total_time_ += |
+ static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); |
+ |
+ DestroyTestGpuMemoryBuffer(); |
+ PrintTestGpuMemoryBuffer("read "); |
+} |
+ |
+TEST_P(GpuMemoryBufferPerfTest, Write) { |
+ scoped_ptr<void*[]> mapped_buffers(new void*[0]); |
+ |
+ CreateTestGpuMemoryBuffer(); |
+ |
+ base::TimeTicks start = base::TimeTicks::Now(); |
+ for (int i = 0; i < kNumRuns; ++i) { |
+ BufferMap(mapped_buffers); |
+ BufferWrite(static_cast<uint32_t*>(mapped_buffers[0])); |
+ BufferUnmap(); |
+ } |
+ total_time_ += |
+ static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); |
+ |
+ DestroyTestGpuMemoryBuffer(); |
+ PrintTestGpuMemoryBuffer("write "); |
+} |
+ |
+// TODO: description |
+TEST_P(GpuMemoryBufferPerfTest, ReadWrite) { |
+ scoped_ptr<void*[]> mapped_buffers(new void*[0]); |
+ |
+ CreateTestGpuMemoryBuffer(); |
+ |
+ base::TimeTicks start = base::TimeTicks::Now(); |
+ for (int i = 0; i < kNumRuns; ++i) { |
+ BufferMap(mapped_buffers); |
+ BufferRead(static_cast<uint32_t*>(mapped_buffers[0])); |
+ BufferWrite(static_cast<uint32_t*>(mapped_buffers[0])); |
+ BufferUnmap(); |
+ } |
+ total_time_ += |
+ static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); |
+ |
+ DestroyTestGpuMemoryBuffer(); |
+ PrintTestGpuMemoryBuffer("read & write "); |
+} |
+ |
+std::vector<gfx::GpuMemoryBufferType> GetSupportedGpuMemoryBufferTypes() { |
+ std::vector<gfx::GpuMemoryBufferType> supported_types; |
+ GpuMemoryBufferFactory::GetSupportedTypes(&supported_types); |
+ return supported_types; |
+} |
+ |
+INSTANTIATE_TEST_CASE_P( |
+ GpuMemoryBufferPerfTests, |
+ GpuMemoryBufferPerfTest, |
+ ::testing::ValuesIn(GetSupportedGpuMemoryBufferTypes())); |
+} // namespace |
+} // namespace content |