| Index: content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc | 
| diff --git a/content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc b/content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..34a652f93e05578c730b4c48f5ad3adba6787868 | 
| --- /dev/null | 
| +++ b/content/common/gpu/client/gpu_memory_buffer_impl_perftest.cc | 
| @@ -0,0 +1,243 @@ | 
| +// Copyright 2015 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +// The idea is to benchmark how the hardware, on different usages of | 
| +// GpuMemoryBuffer, performs when a native buffer object is mapped into the | 
| +// CPU. In particular this test aims to capture the effects of data coherency | 
| +// and could be extended further to answer the following: | 
| +// | 
| +// - measure memory mapping performance of GpuMemoryBuffer using shared memory | 
| +//  (fallback case) and also native implementation. | 
| +// - what if the Renderer process (client) just writes into the buffer object? | 
| +// - what's the effect of reading from a write-combining (WC) memory? can we | 
| +//   avoid read backs? | 
| +// - should it be UC and/or WC mapped, to get a faster access? | 
| +// - what the effect of clients doing sequential writes or non-sequential?  If | 
| +//   the latter, a WC mapping may end up being very slow. | 
| + | 
| +#include "content/common/gpu/client/gpu_memory_buffer_impl.h" | 
| + | 
| +#include "base/bind.h" | 
| +#include "content/common/gpu/gpu_memory_buffer_factory.h" | 
| +#include "testing/gtest/include/gtest/gtest.h" | 
| +#include "testing/perf/perf_test.h" | 
| + | 
| +namespace content { | 
| +namespace { | 
| + | 
| +const int kNumRuns = 1000; | 
| +const int kClientId = 1; | 
| + | 
| +// Do we want to move this into a public API? | 
| +std::string GpuMemoryBufferTypeName(gfx::GpuMemoryBufferType type) { | 
| +  switch (type) { | 
| +    case gfx::SHARED_MEMORY_BUFFER: | 
| +      return "shared_memory"; | 
| +    case gfx::IO_SURFACE_BUFFER: | 
| +      return "io_surface"; | 
| +    case gfx::SURFACE_TEXTURE_BUFFER: | 
| +      return "surface_texture"; | 
| +    case gfx::OZONE_NATIVE_BUFFER: | 
| +      return "ozone_native"; | 
| +    default: | 
| +      NOTREACHED(); | 
| +      return ""; | 
| +  } | 
| +} | 
| + | 
| +class GpuMemoryBufferPerfTest | 
| +    : public testing::TestWithParam<gfx::GpuMemoryBufferType> { | 
| + public: | 
| +  GpuMemoryBufferPerfTest() | 
| +    : buffer_count_(0), | 
| +      buffer_size_(128, 128), | 
| +      buffer_(nullptr), | 
| +      factory_(nullptr) {} | 
| + | 
| +  // Overridden from testing::Test: | 
| +  void SetUp() override { | 
| +    factory_ = GpuMemoryBufferFactory::Create(GetParam()); | 
| +  } | 
| +  void TearDown() override { factory_.reset(); } | 
| + | 
| +  gfx::GpuMemoryBufferHandle CreateGpuMemoryBuffer( | 
| +      gfx::GpuMemoryBufferId id, | 
| +      const gfx::Size& size, | 
| +      gfx::GpuMemoryBuffer::Format format, | 
| +      gfx::GpuMemoryBuffer::Usage usage) { | 
| +    ++buffer_count_; | 
| +    return factory_->CreateGpuMemoryBuffer(id, size, format, usage, kClientId, | 
| +                                           gfx::kNullPluginWindow); | 
| +  } | 
| + | 
| +  void DestroyGpuMemoryBuffer(gfx::GpuMemoryBufferId id, uint32 sync_point) { | 
| +    factory_->DestroyGpuMemoryBuffer(id, kClientId); | 
| +    DCHECK_GT(buffer_count_, 0); | 
| +    --buffer_count_; | 
| +  } | 
| + | 
| +  void CreateTestGpuMemoryBuffer(void) { | 
| +    const int kBufferId = 1; | 
| +    const gfx::GpuMemoryBuffer::Format format = | 
| +      gfx::GpuMemoryBuffer::BGRA_8888; | 
| +    const gfx::GpuMemoryBuffer::Usage usage = gfx::GpuMemoryBuffer::MAP; | 
| +    total_time_ = 0; | 
| + | 
| +    handle_ = | 
| +      CreateGpuMemoryBuffer(kBufferId, buffer_size_, format, usage); | 
| + | 
| +    buffer_ = | 
| +        GpuMemoryBufferImpl::CreateFromHandle( | 
| +          handle_, | 
| +          buffer_size_, format, usage, | 
| +          base::Bind(&GpuMemoryBufferPerfTest::DestroyGpuMemoryBuffer, | 
| +            base::Unretained(this), kBufferId)); | 
| +    ASSERT_TRUE(buffer_); | 
| +    EXPECT_FALSE(buffer_->IsMapped()); | 
| +  } | 
| + | 
| +  void DestroyTestGpuMemoryBuffer(void) { | 
| +    buffer_.reset(); | 
| +  } | 
| + | 
| +  void BufferMap(scoped_ptr<void* []> const &buffers) { | 
| +    base::TimeTicks start = base::TimeTicks::Now(); | 
| +    bool rv = buffer_->Map(buffers.get()); | 
| +    total_time_ += | 
| +      static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); | 
| +    ASSERT_TRUE(rv); | 
| +    EXPECT_TRUE(buffer_->IsMapped()); | 
| +  } | 
| + | 
| +  void BufferUnmap(void) { | 
| +    buffer_->Unmap(); | 
| +    EXPECT_FALSE(buffer_->IsMapped()); | 
| +  } | 
| + | 
| +  void BufferRead(volatile uint32_t *ptr) { | 
| +    int j; | 
| +    int size = buffer_size_.width() * buffer_size_.height(); | 
| +    int x = 0; | 
| + | 
| +    for (j = 0; j < static_cast<int>(size/sizeof(*ptr)); j++) | 
| +      x += ptr[j]; | 
| + | 
| +    /* force overtly clever gcc to actually compute x */ | 
| +    ptr[0] = x; | 
| +  } | 
| + | 
| +  void BufferWrite(volatile uint32_t *ptr) { | 
| +    int j; | 
| +    int size = buffer_size_.width() * buffer_size_.height(); | 
| + | 
| +    for (j = 0; j < static_cast<int>(size/sizeof(*ptr)); j++) | 
| +      ptr[j] = j; | 
| +  } | 
| + | 
| +  void PrintTestGpuMemoryBuffer(std::string name) { | 
| +    // Sometimes the first runs have worse performance so it might be useful to | 
| +    // print out the standard deviation as well to capture this discrepancy. | 
| +    perf_test::PrintResult( | 
| +        "gpu_memory_buffer_time_", | 
| +        GpuMemoryBufferTypeName(handle_.type), | 
| +        name, | 
| +        total_time_ / static_cast<double>(kNumRuns), | 
| +        "us/task", | 
| +        true); | 
| +  } | 
| +  double total_time_; | 
| + | 
| + private: | 
| +  int buffer_count_; | 
| +  gfx::Size buffer_size_; | 
| +  gfx::GpuMemoryBufferHandle handle_; | 
| +  scoped_ptr<GpuMemoryBufferImpl> buffer_; | 
| +  scoped_ptr<GpuMemoryBufferFactory> factory_; | 
| +}; | 
| + | 
| +TEST_P(GpuMemoryBufferPerfTest, MapUnmap) { | 
| +  scoped_ptr<void*[]> mapped_buffers(new void*[0]); | 
| + | 
| +  CreateTestGpuMemoryBuffer(); | 
| + | 
| +  base::TimeTicks start = base::TimeTicks::Now(); | 
| +  for (int i = 0; i < kNumRuns; ++i) { | 
| +    BufferMap(mapped_buffers); | 
| +    BufferUnmap(); | 
| +  } | 
| +  total_time_ += | 
| +    static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); | 
| + | 
| +  DestroyTestGpuMemoryBuffer(); | 
| +  PrintTestGpuMemoryBuffer("map & unmap "); | 
| +} | 
| + | 
| +TEST_P(GpuMemoryBufferPerfTest, Read) { | 
| +  scoped_ptr<void*[]> mapped_buffers(new void*[0]); | 
| + | 
| +  CreateTestGpuMemoryBuffer(); | 
| + | 
| +  base::TimeTicks start = base::TimeTicks::Now(); | 
| +  for (int i = 0; i < kNumRuns; ++i) { | 
| +    BufferMap(mapped_buffers); | 
| +    BufferRead(static_cast<uint32_t*>(mapped_buffers[0])); | 
| +    BufferUnmap(); | 
| +  } | 
| +  total_time_ += | 
| +    static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); | 
| + | 
| +  DestroyTestGpuMemoryBuffer(); | 
| +  PrintTestGpuMemoryBuffer("read "); | 
| +} | 
| + | 
| +TEST_P(GpuMemoryBufferPerfTest, Write) { | 
| +  scoped_ptr<void*[]> mapped_buffers(new void*[0]); | 
| + | 
| +  CreateTestGpuMemoryBuffer(); | 
| + | 
| +  base::TimeTicks start = base::TimeTicks::Now(); | 
| +  for (int i = 0; i < kNumRuns; ++i) { | 
| +    BufferMap(mapped_buffers); | 
| +    BufferWrite(static_cast<uint32_t*>(mapped_buffers[0])); | 
| +    BufferUnmap(); | 
| +  } | 
| +  total_time_ += | 
| +    static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); | 
| + | 
| +  DestroyTestGpuMemoryBuffer(); | 
| +  PrintTestGpuMemoryBuffer("write "); | 
| +} | 
| + | 
| +// TODO: description | 
| +TEST_P(GpuMemoryBufferPerfTest, ReadWrite) { | 
| +  scoped_ptr<void*[]> mapped_buffers(new void*[0]); | 
| + | 
| +  CreateTestGpuMemoryBuffer(); | 
| + | 
| +  base::TimeTicks start = base::TimeTicks::Now(); | 
| +  for (int i = 0; i < kNumRuns; ++i) { | 
| +    BufferMap(mapped_buffers); | 
| +    BufferRead(static_cast<uint32_t*>(mapped_buffers[0])); | 
| +    BufferWrite(static_cast<uint32_t*>(mapped_buffers[0])); | 
| +    BufferUnmap(); | 
| +  } | 
| +  total_time_ += | 
| +    static_cast<size_t>((base::TimeTicks::Now() - start).InMicroseconds()); | 
| + | 
| +  DestroyTestGpuMemoryBuffer(); | 
| +  PrintTestGpuMemoryBuffer("read & write "); | 
| +} | 
| + | 
| +std::vector<gfx::GpuMemoryBufferType> GetSupportedGpuMemoryBufferTypes() { | 
| +  std::vector<gfx::GpuMemoryBufferType> supported_types; | 
| +  GpuMemoryBufferFactory::GetSupportedTypes(&supported_types); | 
| +  return supported_types; | 
| +} | 
| + | 
| +INSTANTIATE_TEST_CASE_P( | 
| +    GpuMemoryBufferPerfTests, | 
| +    GpuMemoryBufferPerfTest, | 
| +    ::testing::ValuesIn(GetSupportedGpuMemoryBufferTypes())); | 
| +}  // namespace | 
| +}  // namespace content | 
|  |