Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(215)

Side by Side Diff: media/base/vector_math_perftest.cc

Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « media/base/vector_math_avx.cc ('k') | media/base/vector_math_testing.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <memory> 5 #include <memory>
6 6
7 #include "base/bind.h"
8 #include "base/cpu.h"
7 #include "base/macros.h" 9 #include "base/macros.h"
8 #include "base/memory/aligned_memory.h" 10 #include "base/memory/aligned_memory.h"
9 #include "base/time/time.h" 11 #include "base/time/time.h"
10 #include "build/build_config.h" 12 #include "build/build_config.h"
13 #include "media/base/sinc_resampler.h"
11 #include "media/base/vector_math.h" 14 #include "media/base/vector_math.h"
12 #include "media/base/vector_math_testing.h" 15 #include "media/base/vector_math_testing.h"
13 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/gtest/include/gtest/gtest.h"
14 #include "testing/perf/perf_test.h" 17 #include "testing/perf/perf_test.h"
15 18
16 using base::TimeTicks; 19 using base::TimeTicks;
17 using std::fill; 20 using std::fill;
18 21
19 namespace media { 22 namespace media {
20 23
21 static const int kBenchmarkIterations = 200000; 24 static const int kBenchmarkIterations = 200000;
22 static const int kEWMABenchmarkIterations = 50000; 25 static const int kEWMABenchmarkIterations = 50000;
23 static const float kScale = 0.5; 26 static const float kScale = 0.5;
24 static const int kVectorSize = 8192; 27 static const int kVectorSize = 8192;
25 28
29 static const int kSincResamplerBenchmarkIterations = 50000000;
30 static const double kSampleRateRatio = 192000.0 / 44100.0;
31 static const double kKernelInterpolationFactor = 0.5;
32
33 // Helper function to provide no input to SincResampler's Convolve benchmark.
34 static void DoNothing(int frames, float* destination) {}
35
26 class VectorMathPerfTest : public testing::Test { 36 class VectorMathPerfTest : public testing::Test {
27 public: 37 public:
28 VectorMathPerfTest() { 38 VectorMathPerfTest() {
29 // Initialize input and output vectors. 39 // Initialize input and output vectors.
30 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( 40 input_vector_.reset(static_cast<float*>(base::AlignedAlloc(
31 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); 41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
32 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( 42 output_vector_.reset(static_cast<float*>(base::AlignedAlloc(
33 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); 43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));
34 fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f); 44 fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f);
35 fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f); 45 fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);
36 } 46 }
37 47
48 void RunBenchmark(float (*fn)(const float[], const float[], int),
49 bool aligned,
50 const std::string& test_name,
51 const std::string& trace_name) {
52 TimeTicks start = TimeTicks::Now();
53 for (int i = 0; i < kBenchmarkIterations; ++i) {
54 fn(input_vector_.get(), output_vector_.get(),
55 kVectorSize - (aligned ? 0 : 1));
56 }
57 double total_time_milliseconds =
58 (TimeTicks::Now() - start).InMillisecondsF();
59 perf_test::PrintResult(test_name, "", trace_name,
60 kBenchmarkIterations / total_time_milliseconds,
61 "runs/ms", true);
62 }
63
38 void RunBenchmark(void (*fn)(const float[], float, int, float[]), 64 void RunBenchmark(void (*fn)(const float[], float, int, float[]),
39 bool aligned, 65 bool aligned,
40 const std::string& test_name, 66 const std::string& test_name,
41 const std::string& trace_name) { 67 const std::string& trace_name) {
42 TimeTicks start = TimeTicks::Now(); 68 TimeTicks start = TimeTicks::Now();
43 for (int i = 0; i < kBenchmarkIterations; ++i) { 69 for (int i = 0; i < kBenchmarkIterations; ++i) {
44 fn(input_vector_.get(), 70 fn(input_vector_.get(), kScale, kVectorSize - (aligned ? 0 : 1),
45 kScale,
46 kVectorSize - (aligned ? 0 : 1),
47 output_vector_.get()); 71 output_vector_.get());
48 } 72 }
49 double total_time_milliseconds = 73 double total_time_milliseconds =
50 (TimeTicks::Now() - start).InMillisecondsF(); 74 (TimeTicks::Now() - start).InMillisecondsF();
51 perf_test::PrintResult(test_name, 75 perf_test::PrintResult(test_name, "", trace_name,
52 "",
53 trace_name,
54 kBenchmarkIterations / total_time_milliseconds, 76 kBenchmarkIterations / total_time_milliseconds,
55 "runs/ms", 77 "runs/ms", true);
56 true);
57 } 78 }
58 79
59 void RunBenchmark( 80 void RunBenchmark(
60 std::pair<float, float> (*fn)(float, const float[], int, float), 81 std::pair<float, float> (*fn)(float, const float[], int, float),
61 int len, 82 int len,
62 const std::string& test_name, 83 const std::string& test_name,
63 const std::string& trace_name) { 84 const std::string& trace_name) {
64 TimeTicks start = TimeTicks::Now(); 85 TimeTicks start = TimeTicks::Now();
65 for (int i = 0; i < kEWMABenchmarkIterations; ++i) { 86 for (int i = 0; i < kEWMABenchmarkIterations; ++i) {
66 fn(0.5f, input_vector_.get(), len, 0.1f); 87 fn(0.5f, input_vector_.get(), len, 0.1f);
67 } 88 }
68 double total_time_milliseconds = 89 double total_time_milliseconds =
69 (TimeTicks::Now() - start).InMillisecondsF(); 90 (TimeTicks::Now() - start).InMillisecondsF();
70 perf_test::PrintResult(test_name, 91 perf_test::PrintResult(test_name, "", trace_name,
71 "",
72 trace_name,
73 kEWMABenchmarkIterations / total_time_milliseconds, 92 kEWMABenchmarkIterations / total_time_milliseconds,
74 "runs/ms", 93 "runs/ms", true);
75 true); 94 }
95
96 void RunBenchmark(
97 SincResampler* resampler,
98 float (*convolve_fn)(const float*, const float*, const float*, double),
99 bool aligned,
100 const std::string& trace_name) {
101 base::TimeTicks start = base::TimeTicks::Now();
102 for (int i = 0; i < kSincResamplerBenchmarkIterations; ++i) {
103 convolve_fn(resampler->get_kernel_for_testing() + (aligned ? 0 : 1),
104 resampler->get_kernel_for_testing(),
105 resampler->get_kernel_for_testing(),
106 kKernelInterpolationFactor);
107 }
108 double total_time_milliseconds =
109 (base::TimeTicks::Now() - start).InMillisecondsF();
110 perf_test::PrintResult(
111 "sinc_resampler_convolve", "", trace_name,
112 kSincResamplerBenchmarkIterations / total_time_milliseconds, "runs/ms",
113 true);
76 } 114 }
77 115
78 protected: 116 protected:
79 std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_; 117 std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_;
80 std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_; 118 std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_;
81 119
82 DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest); 120 DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest);
83 }; 121 };
84 122
85 // Define platform dependent function names for SIMD optimized methods. 123 // Define platform dependent function names for SIMD optimized methods.
86 #if defined(ARCH_CPU_X86_FAMILY) 124 #if defined(ARCH_CPU_X86_FAMILY)
125 #define CONVOLVE_FUNC Convolve_SSE
126 #define DOTPRODUCT_FUNC DotProduct_SSE
127 #define DOTPRODUCT_FUNC2 DotProduct_AVX
87 #define FMAC_FUNC FMAC_SSE 128 #define FMAC_FUNC FMAC_SSE
88 #define FMUL_FUNC FMUL_SSE 129 #define FMUL_FUNC FMUL_SSE
130 #define FMAC_FUNC2 FMAC_AVX
89 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE 131 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE
90 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) 132 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
133 #define CONVOLVE_FUNC Convolve_NEON
134 #define DOTPRODUCT_FUNC DotProduct_NEON
91 #define FMAC_FUNC FMAC_NEON 135 #define FMAC_FUNC FMAC_NEON
92 #define FMUL_FUNC FMUL_NEON 136 #define FMUL_FUNC FMUL_NEON
93 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON 137 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON
94 #endif 138 #endif
95 139
140 // Benchmark for the various Convolve() methods. Make sure to build with
141 // branding=Chrome so that DCHECKs are compiled out when benchmarking.
142 TEST_F(VectorMathPerfTest, Convolve) {
143 SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,
144 base::Bind(&DoNothing));
145
146 RunBenchmark(&resampler, vector_math::Convolve_C, true,
147 "unoptimized_aligned");
148
149 #if defined(CONVOLVE_FUNC)
150 RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, true,
151 "optimized_aligned");
152 RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, false,
153 "optimized_unaligned");
154 #endif
155 }
156
157 // Benchmark for each optimized vector_math::FMAC() method.
158 TEST_F(VectorMathPerfTest, DotProduct) {
159 // Benchmark DotProduct_C().
160 RunBenchmark(vector_math::DotProduct_C, true, "vector_math_dotproduct",
161 "unoptimized");
162 #if defined(DOTPRODUCT_FUNC)
163 // Benchmark DOTPRODUCT_FUNC() with unaligned size.
164 ASSERT_NE(
165 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),
166 0U);
167 RunBenchmark(vector_math::DOTPRODUCT_FUNC, false, "vector_math_dotproduct",
168 "optimized_unaligned");
169 // Benchmark DOTPRODUCT_FUNC() with aligned size.
170 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
171 0U);
172 RunBenchmark(vector_math::DOTPRODUCT_FUNC, true, "vector_math_dotproduct",
173 "optimized_aligned");
174 #if defined(FMAC_FUNC2)
175 // Benchmark DOTPRODUCT_FUNC() with unaligned size.
176 if (!base::CPU().has_avx())
177 return;
178 ASSERT_NE(
179 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),
180 0U);
181 RunBenchmark(vector_math::DOTPRODUCT_FUNC2, false, "vector_math_dotproduct",
182 "optimized2_unaligned");
183 // Benchmark FMAC_FUNC() with aligned size.
184 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
185 0U);
186 RunBenchmark(vector_math::DOTPRODUCT_FUNC2, true, "vector_math_dotproduct",
187 "optimized2_aligned");
188 #endif
189 #endif
190 }
191
96 // Benchmark for each optimized vector_math::FMAC() method. 192 // Benchmark for each optimized vector_math::FMAC() method.
97 TEST_F(VectorMathPerfTest, FMAC) { 193 TEST_F(VectorMathPerfTest, FMAC) {
98 // Benchmark FMAC_C(). 194 // Benchmark FMAC_C().
99 RunBenchmark( 195 RunBenchmark(vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");
100 vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");
101 #if defined(FMAC_FUNC) 196 #if defined(FMAC_FUNC)
102 // Benchmark FMAC_FUNC() with unaligned size. 197 // Benchmark FMAC_FUNC() with unaligned size.
103 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / 198 ASSERT_NE(
104 sizeof(float)), 0U); 199 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),
105 RunBenchmark( 200 0U);
106 vector_math::FMAC_FUNC, false, "vector_math_fmac", "optimized_unaligned"); 201 RunBenchmark(vector_math::FMAC_FUNC, false, "vector_math_fmac",
202 "optimized_unaligned");
107 // Benchmark FMAC_FUNC() with aligned size. 203 // Benchmark FMAC_FUNC() with aligned size.
108 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), 204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
109 0U); 205 0U);
110 RunBenchmark( 206 RunBenchmark(vector_math::FMAC_FUNC, true, "vector_math_fmac",
111 vector_math::FMAC_FUNC, true, "vector_math_fmac", "optimized_aligned"); 207 "optimized_aligned");
208 #if defined(FMAC_FUNC2)
209 // Benchmark FMAC_FUNC() with unaligned size.
210 if (!base::CPU().has_avx())
211 return;
212 ASSERT_NE(
213 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),
214 0U);
215 RunBenchmark(vector_math::FMAC_FUNC2, false, "vector_math_fmac",
216 "optimized2_unaligned");
217 // Benchmark FMAC_FUNC() with aligned size.
218 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
219 0U);
220 RunBenchmark(vector_math::FMAC_FUNC2, true, "vector_math_fmac",
221 "optimized2_aligned");
222 #endif
112 #endif 223 #endif
113 } 224 }
114 225
115 // Benchmark for each optimized vector_math::FMUL() method. 226 // Benchmark for each optimized vector_math::FMUL() method.
116 TEST_F(VectorMathPerfTest, FMUL) { 227 TEST_F(VectorMathPerfTest, FMUL) {
117 // Benchmark FMUL_C(). 228 // Benchmark FMUL_C().
118 RunBenchmark( 229 RunBenchmark(vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");
119 vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");
120 #if defined(FMUL_FUNC) 230 #if defined(FMUL_FUNC)
121 // Benchmark FMUL_FUNC() with unaligned size. 231 // Benchmark FMUL_FUNC() with unaligned size.
122 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / 232 ASSERT_NE(
123 sizeof(float)), 0U); 233 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),
124 RunBenchmark( 234 0U);
125 vector_math::FMUL_FUNC, false, "vector_math_fmul", "optimized_unaligned"); 235 RunBenchmark(vector_math::FMUL_FUNC, false, "vector_math_fmul",
236 "optimized_unaligned");
126 // Benchmark FMUL_FUNC() with aligned size. 237 // Benchmark FMUL_FUNC() with aligned size.
127 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), 238 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
128 0U); 239 0U);
129 RunBenchmark( 240 RunBenchmark(vector_math::FMUL_FUNC, true, "vector_math_fmul",
130 vector_math::FMUL_FUNC, true, "vector_math_fmul", "optimized_aligned"); 241 "optimized_aligned");
131 #endif 242 #endif
132 } 243 }
133 244
134 // Benchmark for each optimized vector_math::EWMAAndMaxPower() method. 245 // Benchmark for each optimized vector_math::EWMAAndMaxPower() method.
135 TEST_F(VectorMathPerfTest, EWMAAndMaxPower) { 246 TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {
136 // Benchmark EWMAAndMaxPower_C(). 247 // Benchmark EWMAAndMaxPower_C().
137 RunBenchmark(vector_math::EWMAAndMaxPower_C, 248 RunBenchmark(vector_math::EWMAAndMaxPower_C, kVectorSize,
138 kVectorSize, 249 "vector_math_ewma_and_max_power", "unoptimized");
139 "vector_math_ewma_and_max_power",
140 "unoptimized");
141 #if defined(EWMAAndMaxPower_FUNC) 250 #if defined(EWMAAndMaxPower_FUNC)
142 // Benchmark EWMAAndMaxPower_FUNC() with unaligned size. 251 // Benchmark EWMAAndMaxPower_FUNC() with unaligned size.
143 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / 252 ASSERT_NE(
144 sizeof(float)), 0U); 253 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),
145 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, 254 0U);
146 kVectorSize - 1, 255 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize - 1,
147 "vector_math_ewma_and_max_power", 256 "vector_math_ewma_and_max_power", "optimized_unaligned");
148 "optimized_unaligned");
149 // Benchmark EWMAAndMaxPower_FUNC() with aligned size. 257 // Benchmark EWMAAndMaxPower_FUNC() with aligned size.
150 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), 258 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
151 0U); 259 0U);
152 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, 260 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize,
153 kVectorSize, 261 "vector_math_ewma_and_max_power", "optimized_aligned");
154 "vector_math_ewma_and_max_power",
155 "optimized_aligned");
156 #endif 262 #endif
157 } 263 }
158 264
159 } // namespace media 265 } // namespace media
OLDNEW
« no previous file with comments | « media/base/vector_math_avx.cc ('k') | media/base/vector_math_testing.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698