| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <memory> | 5 #include <memory> |
| 6 | 6 |
| 7 #include "base/bind.h" |
| 8 #include "base/cpu.h" |
| 7 #include "base/macros.h" | 9 #include "base/macros.h" |
| 8 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
| 9 #include "base/time/time.h" | 11 #include "base/time/time.h" |
| 10 #include "build/build_config.h" | 12 #include "build/build_config.h" |
| 13 #include "media/base/sinc_resampler.h" |
| 11 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
| 12 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
| 13 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
| 14 #include "testing/perf/perf_test.h" | 17 #include "testing/perf/perf_test.h" |
| 15 | 18 |
| 16 using base::TimeTicks; | 19 using base::TimeTicks; |
| 17 using std::fill; | 20 using std::fill; |
| 18 | 21 |
| 19 namespace media { | 22 namespace media { |
| 20 | 23 |
| 21 static const int kBenchmarkIterations = 200000; | 24 static const int kBenchmarkIterations = 200000; |
| 22 static const int kEWMABenchmarkIterations = 50000; | 25 static const int kEWMABenchmarkIterations = 50000; |
| 23 static const float kScale = 0.5; | 26 static const float kScale = 0.5; |
| 24 static const int kVectorSize = 8192; | 27 static const int kVectorSize = 8192; |
| 25 | 28 |
| 29 static const int kSincResamplerBenchmarkIterations = 50000000; |
| 30 static const double kSampleRateRatio = 192000.0 / 44100.0; |
| 31 static const double kKernelInterpolationFactor = 0.5; |
| 32 |
| 33 // Helper function to provide no input to SincResampler's Convolve benchmark. |
| 34 static void DoNothing(int frames, float* destination) {} |
| 35 |
| 26 class VectorMathPerfTest : public testing::Test { | 36 class VectorMathPerfTest : public testing::Test { |
| 27 public: | 37 public: |
| 28 VectorMathPerfTest() { | 38 VectorMathPerfTest() { |
| 29 // Initialize input and output vectors. | 39 // Initialize input and output vectors. |
| 30 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( | 40 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
| 31 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
| 32 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( | 42 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
| 33 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
| 34 fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f); | 44 fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f); |
| 35 fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f); | 45 fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f); |
| 36 } | 46 } |
| 37 | 47 |
| 48 void RunBenchmark(float (*fn)(const float[], const float[], int), |
| 49 bool aligned, |
| 50 const std::string& test_name, |
| 51 const std::string& trace_name) { |
| 52 TimeTicks start = TimeTicks::Now(); |
| 53 for (int i = 0; i < kBenchmarkIterations; ++i) { |
| 54 fn(input_vector_.get(), output_vector_.get(), |
| 55 kVectorSize - (aligned ? 0 : 1)); |
| 56 } |
| 57 double total_time_milliseconds = |
| 58 (TimeTicks::Now() - start).InMillisecondsF(); |
| 59 perf_test::PrintResult(test_name, "", trace_name, |
| 60 kBenchmarkIterations / total_time_milliseconds, |
| 61 "runs/ms", true); |
| 62 } |
| 63 |
| 38 void RunBenchmark(void (*fn)(const float[], float, int, float[]), | 64 void RunBenchmark(void (*fn)(const float[], float, int, float[]), |
| 39 bool aligned, | 65 bool aligned, |
| 40 const std::string& test_name, | 66 const std::string& test_name, |
| 41 const std::string& trace_name) { | 67 const std::string& trace_name) { |
| 42 TimeTicks start = TimeTicks::Now(); | 68 TimeTicks start = TimeTicks::Now(); |
| 43 for (int i = 0; i < kBenchmarkIterations; ++i) { | 69 for (int i = 0; i < kBenchmarkIterations; ++i) { |
| 44 fn(input_vector_.get(), | 70 fn(input_vector_.get(), kScale, kVectorSize - (aligned ? 0 : 1), |
| 45 kScale, | |
| 46 kVectorSize - (aligned ? 0 : 1), | |
| 47 output_vector_.get()); | 71 output_vector_.get()); |
| 48 } | 72 } |
| 49 double total_time_milliseconds = | 73 double total_time_milliseconds = |
| 50 (TimeTicks::Now() - start).InMillisecondsF(); | 74 (TimeTicks::Now() - start).InMillisecondsF(); |
| 51 perf_test::PrintResult(test_name, | 75 perf_test::PrintResult(test_name, "", trace_name, |
| 52 "", | |
| 53 trace_name, | |
| 54 kBenchmarkIterations / total_time_milliseconds, | 76 kBenchmarkIterations / total_time_milliseconds, |
| 55 "runs/ms", | 77 "runs/ms", true); |
| 56 true); | |
| 57 } | 78 } |
| 58 | 79 |
| 59 void RunBenchmark( | 80 void RunBenchmark( |
| 60 std::pair<float, float> (*fn)(float, const float[], int, float), | 81 std::pair<float, float> (*fn)(float, const float[], int, float), |
| 61 int len, | 82 int len, |
| 62 const std::string& test_name, | 83 const std::string& test_name, |
| 63 const std::string& trace_name) { | 84 const std::string& trace_name) { |
| 64 TimeTicks start = TimeTicks::Now(); | 85 TimeTicks start = TimeTicks::Now(); |
| 65 for (int i = 0; i < kEWMABenchmarkIterations; ++i) { | 86 for (int i = 0; i < kEWMABenchmarkIterations; ++i) { |
| 66 fn(0.5f, input_vector_.get(), len, 0.1f); | 87 fn(0.5f, input_vector_.get(), len, 0.1f); |
| 67 } | 88 } |
| 68 double total_time_milliseconds = | 89 double total_time_milliseconds = |
| 69 (TimeTicks::Now() - start).InMillisecondsF(); | 90 (TimeTicks::Now() - start).InMillisecondsF(); |
| 70 perf_test::PrintResult(test_name, | 91 perf_test::PrintResult(test_name, "", trace_name, |
| 71 "", | |
| 72 trace_name, | |
| 73 kEWMABenchmarkIterations / total_time_milliseconds, | 92 kEWMABenchmarkIterations / total_time_milliseconds, |
| 74 "runs/ms", | 93 "runs/ms", true); |
| 75 true); | 94 } |
| 95 |
| 96 void RunBenchmark( |
| 97 SincResampler* resampler, |
| 98 float (*convolve_fn)(const float*, const float*, const float*, double), |
| 99 bool aligned, |
| 100 const std::string& trace_name) { |
| 101 base::TimeTicks start = base::TimeTicks::Now(); |
| 102 for (int i = 0; i < kSincResamplerBenchmarkIterations; ++i) { |
| 103 convolve_fn(resampler->get_kernel_for_testing() + (aligned ? 0 : 1), |
| 104 resampler->get_kernel_for_testing(), |
| 105 resampler->get_kernel_for_testing(), |
| 106 kKernelInterpolationFactor); |
| 107 } |
| 108 double total_time_milliseconds = |
| 109 (base::TimeTicks::Now() - start).InMillisecondsF(); |
| 110 perf_test::PrintResult( |
| 111 "sinc_resampler_convolve", "", trace_name, |
| 112 kSincResamplerBenchmarkIterations / total_time_milliseconds, "runs/ms", |
| 113 true); |
| 76 } | 114 } |
| 77 | 115 |
| 78 protected: | 116 protected: |
| 79 std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_; | 117 std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_; |
| 80 std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_; | 118 std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_; |
| 81 | 119 |
| 82 DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest); | 120 DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest); |
| 83 }; | 121 }; |
| 84 | 122 |
| 85 // Define platform dependent function names for SIMD optimized methods. | 123 // Define platform dependent function names for SIMD optimized methods. |
| 86 #if defined(ARCH_CPU_X86_FAMILY) | 124 #if defined(ARCH_CPU_X86_FAMILY) |
| 125 #define CONVOLVE_FUNC Convolve_SSE |
| 126 #define DOTPRODUCT_FUNC DotProduct_SSE |
| 127 #define DOTPRODUCT_FUNC2 DotProduct_AVX |
| 87 #define FMAC_FUNC FMAC_SSE | 128 #define FMAC_FUNC FMAC_SSE |
| 88 #define FMUL_FUNC FMUL_SSE | 129 #define FMUL_FUNC FMUL_SSE |
| 130 #define FMAC_FUNC2 FMAC_AVX |
| 89 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE | 131 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE |
| 90 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 132 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
| 133 #define CONVOLVE_FUNC Convolve_NEON |
| 134 #define DOTPRODUCT_FUNC DotProduct_NEON |
| 91 #define FMAC_FUNC FMAC_NEON | 135 #define FMAC_FUNC FMAC_NEON |
| 92 #define FMUL_FUNC FMUL_NEON | 136 #define FMUL_FUNC FMUL_NEON |
| 93 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON | 137 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON |
| 94 #endif | 138 #endif |
| 95 | 139 |
| 140 // Benchmark for the various Convolve() methods. Make sure to build with |
| 141 // branding=Chrome so that DCHECKs are compiled out when benchmarking. |
| 142 TEST_F(VectorMathPerfTest, Convolve) { |
| 143 SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize, |
| 144 base::Bind(&DoNothing)); |
| 145 |
| 146 RunBenchmark(&resampler, vector_math::Convolve_C, true, |
| 147 "unoptimized_aligned"); |
| 148 |
| 149 #if defined(CONVOLVE_FUNC) |
| 150 RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, true, |
| 151 "optimized_aligned"); |
| 152 RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, false, |
| 153 "optimized_unaligned"); |
| 154 #endif |
| 155 } |
| 156 |
| 157 // Benchmark for each optimized vector_math::FMAC() method. |
| 158 TEST_F(VectorMathPerfTest, DotProduct) { |
| 159 // Benchmark DotProduct_C(). |
| 160 RunBenchmark(vector_math::DotProduct_C, true, "vector_math_dotproduct", |
| 161 "unoptimized"); |
| 162 #if defined(DOTPRODUCT_FUNC) |
| 163 // Benchmark DOTPRODUCT_FUNC() with unaligned size. |
| 164 ASSERT_NE( |
| 165 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)), |
| 166 0U); |
| 167 RunBenchmark(vector_math::DOTPRODUCT_FUNC, false, "vector_math_dotproduct", |
| 168 "optimized_unaligned"); |
| 169 // Benchmark DOTPRODUCT_FUNC() with aligned size. |
| 170 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), |
| 171 0U); |
| 172 RunBenchmark(vector_math::DOTPRODUCT_FUNC, true, "vector_math_dotproduct", |
| 173 "optimized_aligned"); |
| 174 #if defined(FMAC_FUNC2) |
| 175 // Benchmark DOTPRODUCT_FUNC() with unaligned size. |
| 176 if (!base::CPU().has_avx()) |
| 177 return; |
| 178 ASSERT_NE( |
| 179 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)), |
| 180 0U); |
| 181 RunBenchmark(vector_math::DOTPRODUCT_FUNC2, false, "vector_math_dotproduct", |
| 182 "optimized2_unaligned"); |
| 183 // Benchmark FMAC_FUNC() with aligned size. |
| 184 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), |
| 185 0U); |
| 186 RunBenchmark(vector_math::DOTPRODUCT_FUNC2, true, "vector_math_dotproduct", |
| 187 "optimized2_aligned"); |
| 188 #endif |
| 189 #endif |
| 190 } |
| 191 |
| 96 // Benchmark for each optimized vector_math::FMAC() method. | 192 // Benchmark for each optimized vector_math::FMAC() method. |
| 97 TEST_F(VectorMathPerfTest, FMAC) { | 193 TEST_F(VectorMathPerfTest, FMAC) { |
| 98 // Benchmark FMAC_C(). | 194 // Benchmark FMAC_C(). |
| 99 RunBenchmark( | 195 RunBenchmark(vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized"); |
| 100 vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized"); | |
| 101 #if defined(FMAC_FUNC) | 196 #if defined(FMAC_FUNC) |
| 102 // Benchmark FMAC_FUNC() with unaligned size. | 197 // Benchmark FMAC_FUNC() with unaligned size. |
| 103 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | 198 ASSERT_NE( |
| 104 sizeof(float)), 0U); | 199 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)), |
| 105 RunBenchmark( | 200 0U); |
| 106 vector_math::FMAC_FUNC, false, "vector_math_fmac", "optimized_unaligned"); | 201 RunBenchmark(vector_math::FMAC_FUNC, false, "vector_math_fmac", |
| 202 "optimized_unaligned"); |
| 107 // Benchmark FMAC_FUNC() with aligned size. | 203 // Benchmark FMAC_FUNC() with aligned size. |
| 108 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | 204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), |
| 109 0U); | 205 0U); |
| 110 RunBenchmark( | 206 RunBenchmark(vector_math::FMAC_FUNC, true, "vector_math_fmac", |
| 111 vector_math::FMAC_FUNC, true, "vector_math_fmac", "optimized_aligned"); | 207 "optimized_aligned"); |
| 208 #if defined(FMAC_FUNC2) |
| 209 // Benchmark FMAC_FUNC() with unaligned size. |
| 210 if (!base::CPU().has_avx()) |
| 211 return; |
| 212 ASSERT_NE( |
| 213 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)), |
| 214 0U); |
| 215 RunBenchmark(vector_math::FMAC_FUNC2, false, "vector_math_fmac", |
| 216 "optimized2_unaligned"); |
| 217 // Benchmark FMAC_FUNC() with aligned size. |
| 218 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), |
| 219 0U); |
| 220 RunBenchmark(vector_math::FMAC_FUNC2, true, "vector_math_fmac", |
| 221 "optimized2_aligned"); |
| 222 #endif |
| 112 #endif | 223 #endif |
| 113 } | 224 } |
| 114 | 225 |
| 115 // Benchmark for each optimized vector_math::FMUL() method. | 226 // Benchmark for each optimized vector_math::FMUL() method. |
| 116 TEST_F(VectorMathPerfTest, FMUL) { | 227 TEST_F(VectorMathPerfTest, FMUL) { |
| 117 // Benchmark FMUL_C(). | 228 // Benchmark FMUL_C(). |
| 118 RunBenchmark( | 229 RunBenchmark(vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized"); |
| 119 vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized"); | |
| 120 #if defined(FMUL_FUNC) | 230 #if defined(FMUL_FUNC) |
| 121 // Benchmark FMUL_FUNC() with unaligned size. | 231 // Benchmark FMUL_FUNC() with unaligned size. |
| 122 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | 232 ASSERT_NE( |
| 123 sizeof(float)), 0U); | 233 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)), |
| 124 RunBenchmark( | 234 0U); |
| 125 vector_math::FMUL_FUNC, false, "vector_math_fmul", "optimized_unaligned"); | 235 RunBenchmark(vector_math::FMUL_FUNC, false, "vector_math_fmul", |
| 236 "optimized_unaligned"); |
| 126 // Benchmark FMUL_FUNC() with aligned size. | 237 // Benchmark FMUL_FUNC() with aligned size. |
| 127 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | 238 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), |
| 128 0U); | 239 0U); |
| 129 RunBenchmark( | 240 RunBenchmark(vector_math::FMUL_FUNC, true, "vector_math_fmul", |
| 130 vector_math::FMUL_FUNC, true, "vector_math_fmul", "optimized_aligned"); | 241 "optimized_aligned"); |
| 131 #endif | 242 #endif |
| 132 } | 243 } |
| 133 | 244 |
| 134 // Benchmark for each optimized vector_math::EWMAAndMaxPower() method. | 245 // Benchmark for each optimized vector_math::EWMAAndMaxPower() method. |
| 135 TEST_F(VectorMathPerfTest, EWMAAndMaxPower) { | 246 TEST_F(VectorMathPerfTest, EWMAAndMaxPower) { |
| 136 // Benchmark EWMAAndMaxPower_C(). | 247 // Benchmark EWMAAndMaxPower_C(). |
| 137 RunBenchmark(vector_math::EWMAAndMaxPower_C, | 248 RunBenchmark(vector_math::EWMAAndMaxPower_C, kVectorSize, |
| 138 kVectorSize, | 249 "vector_math_ewma_and_max_power", "unoptimized"); |
| 139 "vector_math_ewma_and_max_power", | |
| 140 "unoptimized"); | |
| 141 #if defined(EWMAAndMaxPower_FUNC) | 250 #if defined(EWMAAndMaxPower_FUNC) |
| 142 // Benchmark EWMAAndMaxPower_FUNC() with unaligned size. | 251 // Benchmark EWMAAndMaxPower_FUNC() with unaligned size. |
| 143 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | 252 ASSERT_NE( |
| 144 sizeof(float)), 0U); | 253 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)), |
| 145 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, | 254 0U); |
| 146 kVectorSize - 1, | 255 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize - 1, |
| 147 "vector_math_ewma_and_max_power", | 256 "vector_math_ewma_and_max_power", "optimized_unaligned"); |
| 148 "optimized_unaligned"); | |
| 149 // Benchmark EWMAAndMaxPower_FUNC() with aligned size. | 257 // Benchmark EWMAAndMaxPower_FUNC() with aligned size. |
| 150 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | 258 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), |
| 151 0U); | 259 0U); |
| 152 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, | 260 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize, |
| 153 kVectorSize, | 261 "vector_math_ewma_and_max_power", "optimized_aligned"); |
| 154 "vector_math_ewma_and_max_power", | |
| 155 "optimized_aligned"); | |
| 156 #endif | 262 #endif |
| 157 } | 263 } |
| 158 | 264 |
| 159 } // namespace media | 265 } // namespace media |
| OLD | NEW |