Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // The idea is to benchmark how the hardware, on different usages of | |
| 6 // GpuMemoryBuffer, performs when the native buffer object (bo) is mapped into | |
| 7 // the CPU. In particular this test aims to capture the effects of data | |
| 8 // coherency and answer the following: | |
| 9 // | |
| 10 // - measure memory mapping performance of GpuMemoryBuffer using shared memory | |
| 11 // (fallback case) and also native implementation of it. | |
| 12 // - what if the Renderer process (client) just writes into the buffer object? | |
| 13 // - what's the effect of reading from a write-combining (WC) memory? can we | |
| 14 // avoid read backs? | |
| 15 // - should it be UC and/or WC mapped, to get a faster access? | |
| 16 // - what the effect of clients doing sequential writes or non-sequential? If | |
| 17 // the latter, a WC mapping may end up being very slow. | |
| 18 | |
| 19 #include "base/bind.h" | |
| 20 #include "base/command_line.h" | |
| 21 #include "base/memory/scoped_vector.h" | |
| 22 #include "base/time/time.h" | |
| 23 #include "content/child/child_gpu_memory_buffer_manager.h" | |
| 24 #include "content/child/child_thread_impl.h" | |
| 25 #include "content/common/gpu/client/gpu_memory_buffer_impl.h" | |
| 26 #include "content/public/common/content_switches.h" | |
| 27 #include "content/public/test/content_browser_test.h" | |
| 28 #include "content/public/test/content_browser_test_utils.h" | |
| 29 #include "content/shell/browser/shell.h" | |
| 30 #include "testing/perf/perf_test.h" | |
| 31 #include "url/gurl.h" | |
| 32 | |
| 33 namespace content { | |
| 34 namespace { | |
| 35 | |
| 36 ChildGpuMemoryBufferManager* child_gpu_memory_buffer_manager_ = NULL; | |
|
dshwang
2015/06/25 10:52:11
nullptr.
why don't BufferPerfTest have this as mem
| |
| 37 | |
| 38 static const int kNumRuns = 30; | |
|
dshwang
2015/06/25 10:52:11
'static' is not needed because of anonymous namesp
| |
| 39 | |
| 40 enum NativeBufferFlag { kDisableNativeBuffers, kEnableNativeBuffers }; | |
| 41 | |
| 42 std::string NativeBufferFlagName(NativeBufferFlag flag) { | |
| 43 switch (flag) { | |
| 44 case kDisableNativeBuffers: | |
| 45 return ""; | |
| 46 case kEnableNativeBuffers: | |
| 47 return "_native"; | |
| 48 } | |
| 49 | |
| 50 NOTREACHED(); | |
| 51 return ""; | |
| 52 } | |
| 53 | |
| 54 static NativeBufferFlag native_buffer_flag_; | |
|
dshwang
2015/06/25 10:52:11
why don't ChildThreadImplGpuMemoryBufferPerfTest h
| |
| 55 | |
| 56 enum MemoryOperation { kMemoryOperationWrite, kMemoryOperationNoop }; | |
|
dshwang
2015/06/25 10:52:11
unused
| |
| 57 | |
| 58 class BufferPerfTest { | |
| 59 public: | |
| 60 BufferPerfTest () | |
| 61 : gpu_memory_buffer_(nullptr), | |
| 62 num_planes_(0), | |
| 63 format_(gfx::GpuMemoryBuffer::BGRA_8888), | |
| 64 buffer_size_(4, 4) {} | |
| 65 | |
| 66 void Allocate(void) { | |
| 67 gpu_memory_buffer_ = | |
| 68 child_gpu_memory_buffer_manager_->AllocateGpuMemoryBuffer( | |
| 69 buffer_size_, format_, gfx::GpuMemoryBuffer::MAP); | |
| 70 ASSERT_TRUE(gpu_memory_buffer_); | |
| 71 | |
| 72 EXPECT_EQ(format_, gpu_memory_buffer_->GetFormat()); | |
| 73 | |
| 74 num_planes_ = | |
| 75 GpuMemoryBufferImpl::NumberOfPlanesForGpuMemoryBufferFormat(format_); | |
| 76 } | |
| 77 | |
| 78 void Map(scoped_ptr<void* []> const &planes, std::string operation_name) { | |
| 79 std::string flag_name = NativeBufferFlagName(native_buffer_flag_); | |
| 80 | |
| 81 base::TimeTicks start = base::TimeTicks::Now(); | |
| 82 bool rv = gpu_memory_buffer_->Map(planes.get()); | |
| 83 base::TimeTicks end = base::TimeTicks::Now(); | |
| 84 ASSERT_TRUE(rv); | |
| 85 EXPECT_TRUE(gpu_memory_buffer_->IsMapped()); | |
| 86 | |
| 87 // TODO(vignatti): get the mean time and print to stdout only once. At the | |
| 88 // moment it's being useful to check individual runs though cause for | |
| 89 // example VGEM has way worse performance on its first runs (got check why). | |
| 90 perf_test::PrintResult( | |
|
dshwang
2015/06/25 10:52:11
This test measures only Map time even in "time_to_
| |
| 91 "time_to_execute_map", | |
| 92 flag_name, | |
| 93 operation_name, | |
| 94 static_cast<size_t>((end - start).InMicroseconds()), | |
| 95 "us", true); | |
| 96 } | |
| 97 | |
| 98 void Unmap(void) { | |
| 99 gpu_memory_buffer_->Unmap(); | |
| 100 EXPECT_FALSE(gpu_memory_buffer_->IsMapped()); | |
| 101 } | |
| 102 | |
| 103 void Write(scoped_ptr<void* []> const &planes) { | |
| 104 // Get stride. | |
| 105 scoped_ptr<int[]> strides(new int[num_planes_]); | |
| 106 gpu_memory_buffer_->GetStride(strides.get()); | |
| 107 | |
| 108 for (size_t plane = 0; plane < num_planes_; ++plane) { | |
| 109 size_t row_size_in_bytes = 0; | |
| 110 EXPECT_TRUE(GpuMemoryBufferImpl::RowSizeInBytes(buffer_size_.width(), | |
| 111 format_, plane, &row_size_in_bytes)); | |
| 112 | |
| 113 scoped_ptr<char[]> data(new char[row_size_in_bytes]); | |
| 114 memset(data.get(), 0x2a + plane, row_size_in_bytes); | |
| 115 | |
| 116 size_t height = buffer_size_.height() / | |
| 117 GpuMemoryBufferImpl::SubsamplingFactor(format_, plane); | |
| 118 for (size_t y = 0; y < height; ++y) { | |
| 119 // Copy |data| to row |y| of |plane| and verify result. | |
| 120 memcpy(static_cast<char*>(planes[plane]) + y * strides[plane], | |
| 121 data.get(), | |
| 122 row_size_in_bytes); | |
| 123 #if defined(NDEBUG) | |
| 124 EXPECT_EQ(memcmp(static_cast<char*>(planes[plane]) + y * strides[plane], | |
| 125 data.get(), row_size_in_bytes), | |
| 126 0); | |
| 127 #endif | |
| 128 } | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 size_t GetNumPlanes() { return num_planes_; } | |
| 133 | |
| 134 private: | |
| 135 scoped_ptr<gfx::GpuMemoryBuffer> gpu_memory_buffer_; | |
| 136 size_t num_planes_; | |
| 137 gfx::GpuMemoryBuffer::Format format_; | |
| 138 gfx::Size buffer_size_; | |
| 139 }; | |
| 140 | |
| 141 class ChildThreadImplBrowserTest : public ContentBrowserTest { | |
| 142 public: | |
| 143 ChildThreadImplBrowserTest() {} | |
| 144 | |
| 145 // Overridden from BrowserTestBase: | |
| 146 void SetUpCommandLine(base::CommandLine* command_line) override { | |
| 147 command_line->AppendSwitch(switches::kSingleProcess); | |
| 148 } | |
| 149 void SetUpOnMainThread() override { | |
| 150 NavigateToURL(shell(), GURL(url::kAboutBlankURL)); | |
| 151 PostTaskToInProcessRendererAndWait( | |
| 152 base::Bind(&ChildThreadImplBrowserTest::SetUpOnChildThread, this)); | |
| 153 } | |
| 154 | |
| 155 private: | |
| 156 void SetUpOnChildThread() { | |
| 157 child_gpu_memory_buffer_manager_ = | |
| 158 ChildThreadImpl::current()->gpu_memory_buffer_manager(); | |
| 159 } | |
| 160 | |
| 161 }; | |
| 162 | |
| 163 class ChildThreadImplGpuMemoryBufferPerfTest | |
| 164 : public ChildThreadImplBrowserTest, | |
| 165 public testing::WithParamInterface< | |
| 166 ::testing::tuple<NativeBufferFlag>> { | |
| 167 public: | |
| 168 ChildThreadImplGpuMemoryBufferPerfTest() {} | |
| 169 | |
| 170 // Overridden from BrowserTestBase: | |
| 171 void SetUpCommandLine(base::CommandLine* command_line) override { | |
| 172 ChildThreadImplBrowserTest::SetUpCommandLine(command_line); | |
| 173 native_buffer_flag_ = ::testing::get<0>(GetParam()); | |
| 174 switch (native_buffer_flag_) { | |
| 175 case kEnableNativeBuffers: | |
| 176 command_line->AppendSwitch(switches::kEnableNativeGpuMemoryBuffers); | |
| 177 break; | |
| 178 case kDisableNativeBuffers: | |
| 179 break; | |
| 180 } | |
| 181 } | |
| 182 | |
| 183 protected: | |
| 184 scoped_ptr<BufferPerfTest> buffer_; | |
| 185 | |
| 186 private: | |
| 187 DISALLOW_COPY_AND_ASSIGN(ChildThreadImplGpuMemoryBufferPerfTest); | |
| 188 }; | |
| 189 | |
| 190 IN_PROC_BROWSER_TEST_P(ChildThreadImplGpuMemoryBufferPerfTest, | |
| 191 Write) { | |
| 192 buffer_.reset(new BufferPerfTest()); | |
| 193 buffer_->Allocate(); | |
| 194 | |
| 195 scoped_ptr<void* []> planes(new void* [buffer_->GetNumPlanes()]); | |
| 196 | |
| 197 for (int i = 0; i < kNumRuns; ++i) { | |
| 198 buffer_->Map(planes, "Write"); | |
| 199 buffer_->Write(planes); | |
|
dshwang
2015/06/25 10:52:11
I think Map and Unmap should be out of 'for block'
| |
| 200 buffer_->Unmap(); | |
| 201 } | |
| 202 } | |
| 203 | |
| 204 IN_PROC_BROWSER_TEST_P(ChildThreadImplGpuMemoryBufferPerfTest, | |
| 205 Map) { | |
| 206 buffer_.reset(new BufferPerfTest()); | |
| 207 buffer_->Allocate(); | |
| 208 | |
| 209 scoped_ptr<void* []> planes(new void* [buffer_->GetNumPlanes()]); | |
| 210 | |
| 211 for (int i = 0; i < kNumRuns; ++i) { | |
| 212 buffer_->Map(planes, "Map"); | |
| 213 buffer_->Unmap(); | |
| 214 } | |
| 215 } | |
| 216 | |
| 217 INSTANTIATE_TEST_CASE_P( | |
| 218 ChildThreadImplGpuMemoryBufferPerfTests, | |
| 219 ChildThreadImplGpuMemoryBufferPerfTest, | |
| 220 ::testing::Values(kDisableNativeBuffers, kEnableNativeBuffers)); | |
| 221 } // namespace | |
| 222 } // namespace content | |
| OLD | NEW |