OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #include "base/command_line.h" | |
10 #include "base/cpu.h" | 9 #include "base/cpu.h" |
11 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
12 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
13 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
14 #include "base/strings/stringize_macros.h" | 13 #include "base/strings/stringize_macros.h" |
15 #include "base/time/time.h" | |
16 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
17 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
18 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
19 | 17 |
20 using base::TimeTicks; | |
21 using std::fill; | 18 using std::fill; |
22 | 19 |
23 // Command line switch for runtime adjustment of benchmark iterations. | 20 namespace media { |
24 static const char kBenchmarkIterations[] = "vector-math-iterations"; | |
25 static const int kDefaultIterations = 10; | |
26 | 21 |
27 // Default test values. | 22 // Default test values. |
28 static const float kScale = 0.5; | 23 static const float kScale = 0.5; |
29 static const float kInputFillValue = 1.0; | 24 static const float kInputFillValue = 1.0; |
30 static const float kOutputFillValue = 3.0; | 25 static const float kOutputFillValue = 3.0; |
31 | 26 static const int kVectorSize = 8192; |
32 namespace media { | |
33 | 27 |
34 class VectorMathTest : public testing::Test { | 28 class VectorMathTest : public testing::Test { |
35 public: | 29 public: |
36 static const int kVectorSize = 8192; | |
37 | 30 |
38 VectorMathTest() { | 31 VectorMathTest() { |
39 // Initialize input and output vectors. | 32 // Initialize input and output vectors. |
40 input_vector.reset(static_cast<float*>(base::AlignedAlloc( | 33 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 34 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
42 output_vector.reset(static_cast<float*>(base::AlignedAlloc( | 35 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 36 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
44 } | 37 } |
45 | 38 |
46 void FillTestVectors(float input, float output) { | 39 void FillTestVectors(float input, float output) { |
47 // Setup input and output vectors. | 40 // Setup input and output vectors. |
48 fill(input_vector.get(), input_vector.get() + kVectorSize, input); | 41 fill(input_vector_.get(), input_vector_.get() + kVectorSize, input); |
49 fill(output_vector.get(), output_vector.get() + kVectorSize, output); | 42 fill(output_vector_.get(), output_vector_.get() + kVectorSize, output); |
50 } | 43 } |
51 | 44 |
52 void VerifyOutput(float value) { | 45 void VerifyOutput(float value) { |
53 for (int i = 0; i < kVectorSize; ++i) | 46 for (int i = 0; i < kVectorSize; ++i) |
54 ASSERT_FLOAT_EQ(output_vector.get()[i], value); | 47 ASSERT_FLOAT_EQ(output_vector_.get()[i], value); |
55 } | |
56 | |
57 int BenchmarkIterations() { | |
58 int vector_math_iterations = kDefaultIterations; | |
59 std::string iterations( | |
60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
61 kBenchmarkIterations)); | |
62 if (!iterations.empty()) | |
63 base::StringToInt(iterations, &vector_math_iterations); | |
64 return vector_math_iterations; | |
65 } | 48 } |
66 | 49 |
67 protected: | 50 protected: |
68 int benchmark_iterations; | 51 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_; |
69 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; | 52 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_; |
70 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; | |
71 | 53 |
72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); | 54 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); |
73 }; | 55 }; |
74 | 56 |
75 // Ensure each optimized vector_math::FMAC() method returns the same value. | 57 // Ensure each optimized vector_math::FMAC() method returns the same value. |
76 TEST_F(VectorMathTest, FMAC) { | 58 TEST_F(VectorMathTest, FMAC) { |
77 static const float kResult = kInputFillValue * kScale + kOutputFillValue; | 59 static const float kResult = kInputFillValue * kScale + kOutputFillValue; |
78 | 60 |
79 { | 61 { |
80 SCOPED_TRACE("FMAC"); | 62 SCOPED_TRACE("FMAC"); |
81 FillTestVectors(kInputFillValue, kOutputFillValue); | 63 FillTestVectors(kInputFillValue, kOutputFillValue); |
82 vector_math::FMAC( | 64 vector_math::FMAC( |
83 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 65 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
84 VerifyOutput(kResult); | 66 VerifyOutput(kResult); |
85 } | 67 } |
86 | 68 |
87 { | 69 { |
88 SCOPED_TRACE("FMAC_C"); | 70 SCOPED_TRACE("FMAC_C"); |
89 FillTestVectors(kInputFillValue, kOutputFillValue); | 71 FillTestVectors(kInputFillValue, kOutputFillValue); |
90 vector_math::FMAC_C( | 72 vector_math::FMAC_C( |
91 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 73 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
92 VerifyOutput(kResult); | 74 VerifyOutput(kResult); |
93 } | 75 } |
94 | 76 |
95 #if defined(ARCH_CPU_X86_FAMILY) | 77 #if defined(ARCH_CPU_X86_FAMILY) |
96 { | 78 { |
97 ASSERT_TRUE(base::CPU().has_sse()); | 79 ASSERT_TRUE(base::CPU().has_sse()); |
98 SCOPED_TRACE("FMAC_SSE"); | 80 SCOPED_TRACE("FMAC_SSE"); |
99 FillTestVectors(kInputFillValue, kOutputFillValue); | 81 FillTestVectors(kInputFillValue, kOutputFillValue); |
100 vector_math::FMAC_SSE( | 82 vector_math::FMAC_SSE( |
101 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 83 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
102 VerifyOutput(kResult); | 84 VerifyOutput(kResult); |
103 } | 85 } |
104 #endif | 86 #endif |
105 | 87 |
106 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 88 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
107 { | 89 { |
108 SCOPED_TRACE("FMAC_NEON"); | 90 SCOPED_TRACE("FMAC_NEON"); |
109 FillTestVectors(kInputFillValue, kOutputFillValue); | 91 FillTestVectors(kInputFillValue, kOutputFillValue); |
110 vector_math::FMAC_NEON( | 92 vector_math::FMAC_NEON( |
111 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 93 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
112 VerifyOutput(kResult); | 94 VerifyOutput(kResult); |
113 } | 95 } |
114 #endif | 96 #endif |
115 } | 97 } |
116 | 98 |
117 // Ensure each optimized vector_math::FMUL() method returns the same value. | 99 // Ensure each optimized vector_math::FMUL() method returns the same value. |
118 TEST_F(VectorMathTest, FMUL) { | 100 TEST_F(VectorMathTest, FMUL) { |
119 static const float kResult = kInputFillValue * kScale; | 101 static const float kResult = kInputFillValue * kScale; |
120 | 102 |
121 { | 103 { |
122 SCOPED_TRACE("FMUL"); | 104 SCOPED_TRACE("FMUL"); |
123 FillTestVectors(kInputFillValue, kOutputFillValue); | 105 FillTestVectors(kInputFillValue, kOutputFillValue); |
124 vector_math::FMUL( | 106 vector_math::FMUL( |
125 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 107 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
126 VerifyOutput(kResult); | 108 VerifyOutput(kResult); |
127 } | 109 } |
128 | 110 |
129 { | 111 { |
130 SCOPED_TRACE("FMUL_C"); | 112 SCOPED_TRACE("FMUL_C"); |
131 FillTestVectors(kInputFillValue, kOutputFillValue); | 113 FillTestVectors(kInputFillValue, kOutputFillValue); |
132 vector_math::FMUL_C( | 114 vector_math::FMUL_C( |
133 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 115 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
134 VerifyOutput(kResult); | 116 VerifyOutput(kResult); |
135 } | 117 } |
136 | 118 |
137 #if defined(ARCH_CPU_X86_FAMILY) | 119 #if defined(ARCH_CPU_X86_FAMILY) |
138 { | 120 { |
139 ASSERT_TRUE(base::CPU().has_sse()); | 121 ASSERT_TRUE(base::CPU().has_sse()); |
140 SCOPED_TRACE("FMUL_SSE"); | 122 SCOPED_TRACE("FMUL_SSE"); |
141 FillTestVectors(kInputFillValue, kOutputFillValue); | 123 FillTestVectors(kInputFillValue, kOutputFillValue); |
142 vector_math::FMUL_SSE( | 124 vector_math::FMUL_SSE( |
143 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 125 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
144 VerifyOutput(kResult); | 126 VerifyOutput(kResult); |
145 } | 127 } |
146 #endif | 128 #endif |
147 | 129 |
148 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 130 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
149 { | 131 { |
150 SCOPED_TRACE("FMUL_NEON"); | 132 SCOPED_TRACE("FMUL_NEON"); |
151 FillTestVectors(kInputFillValue, kOutputFillValue); | 133 FillTestVectors(kInputFillValue, kOutputFillValue); |
152 vector_math::FMUL_NEON( | 134 vector_math::FMUL_NEON( |
153 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 135 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
154 VerifyOutput(kResult); | 136 VerifyOutput(kResult); |
155 } | 137 } |
156 #endif | 138 #endif |
157 } | 139 } |
158 | 140 |
159 // Define platform independent function name for FMACBenchmark* tests. | |
160 #if defined(ARCH_CPU_X86_FAMILY) | |
161 #define FMAC_FUNC FMAC_SSE | |
162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
163 #define FMAC_FUNC FMAC_NEON | |
164 #endif | |
165 | |
166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks | |
167 // were run with --vector-fmac-iterations=200000. | |
168 TEST_F(VectorMathTest, FMACBenchmark) { | |
169 static const int kBenchmarkIterations = BenchmarkIterations(); | |
170 | |
171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
172 | |
173 // Benchmark FMAC_C(). | |
174 FillTestVectors(kInputFillValue, kOutputFillValue); | |
175 TimeTicks start = TimeTicks::HighResNow(); | |
176 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
177 vector_math::FMAC_C( | |
178 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
179 } | |
180 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
181 printf("FMAC_C took %.2fms.\n", total_time_c_ms); | |
182 | |
183 #if defined(FMAC_FUNC) | |
184 #if defined(ARCH_CPU_X86_FAMILY) | |
185 ASSERT_TRUE(base::CPU().has_sse()); | |
186 #endif | |
187 | |
188 // Benchmark FMAC_FUNC() with unaligned size. | |
189 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
190 sizeof(float)), 0U); | |
191 FillTestVectors(kInputFillValue, kOutputFillValue); | |
192 start = TimeTicks::HighResNow(); | |
193 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
194 vector_math::FMAC_FUNC( | |
195 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
196 } | |
197 double total_time_optimized_unaligned_ms = | |
198 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
199 printf(STRINGIZE(FMAC_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms, | |
201 total_time_c_ms / total_time_optimized_unaligned_ms); | |
202 | |
203 // Benchmark FMAC_FUNC() with aligned size. | |
204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
205 0U); | |
206 FillTestVectors(kInputFillValue, kOutputFillValue); | |
207 start = TimeTicks::HighResNow(); | |
208 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
209 vector_math::FMAC_FUNC( | |
210 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
211 } | |
212 double total_time_optimized_aligned_ms = | |
213 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
214 printf(STRINGIZE(FMAC_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
215 "faster than FMAC_C and %.2fx faster than " | |
216 STRINGIZE(FMAC_FUNC) " (unaligned).\n", | |
217 total_time_optimized_aligned_ms, | |
218 total_time_c_ms / total_time_optimized_aligned_ms, | |
219 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
220 #endif | |
221 } | |
222 | |
223 #undef FMAC_FUNC | |
224 | |
225 // Define platform independent function name for FMULBenchmark* tests. | |
226 #if defined(ARCH_CPU_X86_FAMILY) | |
227 #define FMUL_FUNC FMUL_SSE | |
228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
229 #define FMUL_FUNC FMUL_NEON | |
230 #endif | |
231 | |
232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks | |
233 // were run with --vector-math-iterations=200000. | |
234 TEST_F(VectorMathTest, FMULBenchmark) { | |
235 static const int kBenchmarkIterations = BenchmarkIterations(); | |
236 | |
237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
238 | |
239 // Benchmark FMUL_C(). | |
240 FillTestVectors(kInputFillValue, kOutputFillValue); | |
241 TimeTicks start = TimeTicks::HighResNow(); | |
242 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
243 vector_math::FMUL_C( | |
244 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
245 } | |
246 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
247 printf("FMUL_C took %.2fms.\n", total_time_c_ms); | |
248 | |
249 #if defined(FMUL_FUNC) | |
250 #if defined(ARCH_CPU_X86_FAMILY) | |
251 ASSERT_TRUE(base::CPU().has_sse()); | |
252 #endif | |
253 | |
254 // Benchmark FMUL_SSE() with unaligned size. | |
255 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
256 sizeof(float)), 0U); | |
257 FillTestVectors(kInputFillValue, kOutputFillValue); | |
258 start = TimeTicks::HighResNow(); | |
259 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
260 vector_math::FMUL_FUNC( | |
261 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
262 } | |
263 double total_time_optimized_unaligned_ms = | |
264 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
265 printf(STRINGIZE(FMUL_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms, | |
267 total_time_c_ms / total_time_optimized_unaligned_ms); | |
268 | |
269 // Benchmark FMUL_SSE() with aligned size. | |
270 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
271 0U); | |
272 FillTestVectors(kInputFillValue, kOutputFillValue); | |
273 start = TimeTicks::HighResNow(); | |
274 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
275 vector_math::FMUL_FUNC( | |
276 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
277 } | |
278 double total_time_optimized_aligned_ms = | |
279 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
280 printf(STRINGIZE(FMUL_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
281 "faster than FMUL_C and %.2fx faster than " | |
282 STRINGIZE(FMUL_FUNC) " (unaligned).\n", | |
283 total_time_optimized_aligned_ms, | |
284 total_time_c_ms / total_time_optimized_aligned_ms, | |
285 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
286 #endif | |
287 } | |
288 | |
289 #undef FMUL_FUNC | |
290 | |
291 } // namespace media | 141 } // namespace media |
OLD | NEW |