OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #include "base/command_line.h" | |
10 #include "base/cpu.h" | 9 #include "base/cpu.h" |
11 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
12 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
13 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
14 #include "base/strings/stringize_macros.h" | 13 #include "base/strings/stringize_macros.h" |
15 #include "base/time/time.h" | |
16 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
17 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
18 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
19 | 17 |
20 using base::TimeTicks; | |
21 using std::fill; | 18 using std::fill; |
22 | 19 |
23 // Command line switch for runtime adjustment of benchmark iterations. | |
24 static const char kBenchmarkIterations[] = "vector-math-iterations"; | |
25 static const int kDefaultIterations = 10; | |
26 | |
27 // Default test values. | 20 // Default test values. |
28 static const float kScale = 0.5; | 21 static const float kScale = 0.5; |
29 static const float kInputFillValue = 1.0; | 22 static const float kInputFillValue = 1.0; |
30 static const float kOutputFillValue = 3.0; | 23 static const float kOutputFillValue = 3.0; |
31 | 24 |
32 namespace media { | 25 namespace media { |
33 | 26 |
34 class VectorMathTest : public testing::Test { | 27 class VectorMathTest : public testing::Test { |
35 public: | 28 public: |
36 static const int kVectorSize = 8192; | 29 static const int kVectorSize = 8192; |
(...skipping 10 matching lines...) Expand all Loading... |
47 // Setup input and output vectors. | 40 // Setup input and output vectors. |
48 fill(input_vector.get(), input_vector.get() + kVectorSize, input); | 41 fill(input_vector.get(), input_vector.get() + kVectorSize, input); |
49 fill(output_vector.get(), output_vector.get() + kVectorSize, output); | 42 fill(output_vector.get(), output_vector.get() + kVectorSize, output); |
50 } | 43 } |
51 | 44 |
52 void VerifyOutput(float value) { | 45 void VerifyOutput(float value) { |
53 for (int i = 0; i < kVectorSize; ++i) | 46 for (int i = 0; i < kVectorSize; ++i) |
54 ASSERT_FLOAT_EQ(output_vector.get()[i], value); | 47 ASSERT_FLOAT_EQ(output_vector.get()[i], value); |
55 } | 48 } |
56 | 49 |
57 int BenchmarkIterations() { | |
58 int vector_math_iterations = kDefaultIterations; | |
59 std::string iterations( | |
60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
61 kBenchmarkIterations)); | |
62 if (!iterations.empty()) | |
63 base::StringToInt(iterations, &vector_math_iterations); | |
64 return vector_math_iterations; | |
65 } | |
66 | |
67 protected: | 50 protected: |
68 int benchmark_iterations; | |
69 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; | 51 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; |
70 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; | 52 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; |
71 | 53 |
72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); | 54 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); |
73 }; | 55 }; |
74 | 56 |
75 // Ensure each optimized vector_math::FMAC() method returns the same value. | 57 // Ensure each optimized vector_math::FMAC() method returns the same value. |
76 TEST_F(VectorMathTest, FMAC) { | 58 TEST_F(VectorMathTest, FMAC) { |
77 static const float kResult = kInputFillValue * kScale + kOutputFillValue; | 59 static const float kResult = kInputFillValue * kScale + kOutputFillValue; |
78 | 60 |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 { | 131 { |
150 SCOPED_TRACE("FMUL_NEON"); | 132 SCOPED_TRACE("FMUL_NEON"); |
151 FillTestVectors(kInputFillValue, kOutputFillValue); | 133 FillTestVectors(kInputFillValue, kOutputFillValue); |
152 vector_math::FMUL_NEON( | 134 vector_math::FMUL_NEON( |
153 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 135 input_vector.get(), kScale, kVectorSize, output_vector.get()); |
154 VerifyOutput(kResult); | 136 VerifyOutput(kResult); |
155 } | 137 } |
156 #endif | 138 #endif |
157 } | 139 } |
158 | 140 |
159 // Define platform independent function name for FMACBenchmark* tests. | |
160 #if defined(ARCH_CPU_X86_FAMILY) | |
161 #define FMAC_FUNC FMAC_SSE | |
162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
163 #define FMAC_FUNC FMAC_NEON | |
164 #endif | |
165 | |
166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks | |
167 // were run with --vector-fmac-iterations=200000. | |
168 TEST_F(VectorMathTest, FMACBenchmark) { | |
169 static const int kBenchmarkIterations = BenchmarkIterations(); | |
170 | |
171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
172 | |
173 // Benchmark FMAC_C(). | |
174 FillTestVectors(kInputFillValue, kOutputFillValue); | |
175 TimeTicks start = TimeTicks::HighResNow(); | |
176 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
177 vector_math::FMAC_C( | |
178 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
179 } | |
180 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
181 printf("FMAC_C took %.2fms.\n", total_time_c_ms); | |
182 | |
183 #if defined(FMAC_FUNC) | |
184 #if defined(ARCH_CPU_X86_FAMILY) | |
185 ASSERT_TRUE(base::CPU().has_sse()); | |
186 #endif | |
187 | |
188 // Benchmark FMAC_FUNC() with unaligned size. | |
189 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
190 sizeof(float)), 0U); | |
191 FillTestVectors(kInputFillValue, kOutputFillValue); | |
192 start = TimeTicks::HighResNow(); | |
193 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
194 vector_math::FMAC_FUNC( | |
195 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
196 } | |
197 double total_time_optimized_unaligned_ms = | |
198 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
199 printf(STRINGIZE(FMAC_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms, | |
201 total_time_c_ms / total_time_optimized_unaligned_ms); | |
202 | |
203 // Benchmark FMAC_FUNC() with aligned size. | |
204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
205 0U); | |
206 FillTestVectors(kInputFillValue, kOutputFillValue); | |
207 start = TimeTicks::HighResNow(); | |
208 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
209 vector_math::FMAC_FUNC( | |
210 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
211 } | |
212 double total_time_optimized_aligned_ms = | |
213 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
214 printf(STRINGIZE(FMAC_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
215 "faster than FMAC_C and %.2fx faster than " | |
216 STRINGIZE(FMAC_FUNC) " (unaligned).\n", | |
217 total_time_optimized_aligned_ms, | |
218 total_time_c_ms / total_time_optimized_aligned_ms, | |
219 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
220 #endif | |
221 } | |
222 | |
223 #undef FMAC_FUNC | |
224 | |
225 // Define platform independent function name for FMULBenchmark* tests. | |
226 #if defined(ARCH_CPU_X86_FAMILY) | |
227 #define FMUL_FUNC FMUL_SSE | |
228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
229 #define FMUL_FUNC FMUL_NEON | |
230 #endif | |
231 | |
232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks | |
233 // were run with --vector-math-iterations=200000. | |
234 TEST_F(VectorMathTest, FMULBenchmark) { | |
235 static const int kBenchmarkIterations = BenchmarkIterations(); | |
236 | |
237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
238 | |
239 // Benchmark FMUL_C(). | |
240 FillTestVectors(kInputFillValue, kOutputFillValue); | |
241 TimeTicks start = TimeTicks::HighResNow(); | |
242 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
243 vector_math::FMUL_C( | |
244 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
245 } | |
246 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
247 printf("FMUL_C took %.2fms.\n", total_time_c_ms); | |
248 | |
249 #if defined(FMUL_FUNC) | |
250 #if defined(ARCH_CPU_X86_FAMILY) | |
251 ASSERT_TRUE(base::CPU().has_sse()); | |
252 #endif | |
253 | |
254 // Benchmark FMUL_SSE() with unaligned size. | |
255 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
256 sizeof(float)), 0U); | |
257 FillTestVectors(kInputFillValue, kOutputFillValue); | |
258 start = TimeTicks::HighResNow(); | |
259 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
260 vector_math::FMUL_FUNC( | |
261 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
262 } | |
263 double total_time_optimized_unaligned_ms = | |
264 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
265 printf(STRINGIZE(FMUL_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms, | |
267 total_time_c_ms / total_time_optimized_unaligned_ms); | |
268 | |
269 // Benchmark FMUL_SSE() with aligned size. | |
270 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
271 0U); | |
272 FillTestVectors(kInputFillValue, kOutputFillValue); | |
273 start = TimeTicks::HighResNow(); | |
274 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
275 vector_math::FMUL_FUNC( | |
276 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
277 } | |
278 double total_time_optimized_aligned_ms = | |
279 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
280 printf(STRINGIZE(FMUL_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
281 "faster than FMUL_C and %.2fx faster than " | |
282 STRINGIZE(FMUL_FUNC) " (unaligned).\n", | |
283 total_time_optimized_aligned_ms, | |
284 total_time_c_ms / total_time_optimized_aligned_ms, | |
285 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
286 #endif | |
287 } | |
288 | |
289 #undef FMUL_FUNC | |
290 | |
291 } // namespace media | 141 } // namespace media |
OLD | NEW |