OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #include "base/command_line.h" | |
10 #include "base/cpu.h" | 9 #include "base/cpu.h" |
11 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
12 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
13 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
14 #include "base/strings/stringize_macros.h" | 13 #include "base/strings/stringize_macros.h" |
15 #include "base/time/time.h" | |
16 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
17 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
18 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
19 | 17 |
20 using base::TimeTicks; | |
21 using std::fill; | 18 using std::fill; |
22 | 19 |
scherkus (not reviewing)
2013/10/29 18:56:20
remove extra blank line
| |
23 // Command line switch for runtime adjustment of benchmark iterations. | 20 |
24 static const char kBenchmarkIterations[] = "vector-math-iterations"; | 21 namespace media { |
25 static const int kDefaultIterations = 10; | |
26 | 22 |
27 // Default test values. | 23 // Default test values. |
28 static const float kScale = 0.5; | 24 static const float kScale = 0.5; |
29 static const float kInputFillValue = 1.0; | 25 static const float kInputFillValue = 1.0; |
30 static const float kOutputFillValue = 3.0; | 26 static const float kOutputFillValue = 3.0; |
31 | 27 static const int kVectorSize = 8192; |
32 namespace media { | |
33 | 28 |
34 class VectorMathTest : public testing::Test { | 29 class VectorMathTest : public testing::Test { |
35 public: | 30 public: |
36 static const int kVectorSize = 8192; | |
37 | 31 |
38 VectorMathTest() { | 32 VectorMathTest() { |
39 // Initialize input and output vectors. | 33 // Initialize input and output vectors. |
40 input_vector.reset(static_cast<float*>(base::AlignedAlloc( | 34 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 35 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
42 output_vector.reset(static_cast<float*>(base::AlignedAlloc( | 36 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 37 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
44 } | 38 } |
45 | 39 |
46 void FillTestVectors(float input, float output) { | 40 void FillTestVectors(float input, float output) { |
47 // Setup input and output vectors. | 41 // Setup input and output vectors. |
48 fill(input_vector.get(), input_vector.get() + kVectorSize, input); | 42 fill(input_vector_.get(), input_vector_.get() + kVectorSize, input); |
49 fill(output_vector.get(), output_vector.get() + kVectorSize, output); | 43 fill(output_vector_.get(), output_vector_.get() + kVectorSize, output); |
50 } | 44 } |
51 | 45 |
52 void VerifyOutput(float value) { | 46 void VerifyOutput(float value) { |
53 for (int i = 0; i < kVectorSize; ++i) | 47 for (int i = 0; i < kVectorSize; ++i) |
54 ASSERT_FLOAT_EQ(output_vector.get()[i], value); | 48 ASSERT_FLOAT_EQ(output_vector_.get()[i], value); |
55 } | |
56 | |
57 int BenchmarkIterations() { | |
58 int vector_math_iterations = kDefaultIterations; | |
59 std::string iterations( | |
60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
61 kBenchmarkIterations)); | |
62 if (!iterations.empty()) | |
63 base::StringToInt(iterations, &vector_math_iterations); | |
64 return vector_math_iterations; | |
65 } | 49 } |
66 | 50 |
67 protected: | 51 protected: |
68 int benchmark_iterations; | 52 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_; |
69 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; | 53 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_; |
70 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; | |
71 | 54 |
72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); | 55 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); |
73 }; | 56 }; |
74 | 57 |
75 // Ensure each optimized vector_math::FMAC() method returns the same value. | 58 // Ensure each optimized vector_math::FMAC() method returns the same value. |
76 TEST_F(VectorMathTest, FMAC) { | 59 TEST_F(VectorMathTest, FMAC) { |
77 static const float kResult = kInputFillValue * kScale + kOutputFillValue; | 60 static const float kResult = kInputFillValue * kScale + kOutputFillValue; |
78 | 61 |
79 { | 62 { |
80 SCOPED_TRACE("FMAC"); | 63 SCOPED_TRACE("FMAC"); |
81 FillTestVectors(kInputFillValue, kOutputFillValue); | 64 FillTestVectors(kInputFillValue, kOutputFillValue); |
82 vector_math::FMAC( | 65 vector_math::FMAC( |
83 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 66 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
84 VerifyOutput(kResult); | 67 VerifyOutput(kResult); |
85 } | 68 } |
86 | 69 |
87 { | 70 { |
88 SCOPED_TRACE("FMAC_C"); | 71 SCOPED_TRACE("FMAC_C"); |
89 FillTestVectors(kInputFillValue, kOutputFillValue); | 72 FillTestVectors(kInputFillValue, kOutputFillValue); |
90 vector_math::FMAC_C( | 73 vector_math::FMAC_C( |
91 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 74 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
92 VerifyOutput(kResult); | 75 VerifyOutput(kResult); |
93 } | 76 } |
94 | 77 |
95 #if defined(ARCH_CPU_X86_FAMILY) | 78 #if defined(ARCH_CPU_X86_FAMILY) |
96 { | 79 { |
97 ASSERT_TRUE(base::CPU().has_sse()); | 80 ASSERT_TRUE(base::CPU().has_sse()); |
98 SCOPED_TRACE("FMAC_SSE"); | 81 SCOPED_TRACE("FMAC_SSE"); |
99 FillTestVectors(kInputFillValue, kOutputFillValue); | 82 FillTestVectors(kInputFillValue, kOutputFillValue); |
100 vector_math::FMAC_SSE( | 83 vector_math::FMAC_SSE( |
101 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 84 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
102 VerifyOutput(kResult); | 85 VerifyOutput(kResult); |
103 } | 86 } |
104 #endif | 87 #endif |
105 | 88 |
106 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 89 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
107 { | 90 { |
108 SCOPED_TRACE("FMAC_NEON"); | 91 SCOPED_TRACE("FMAC_NEON"); |
109 FillTestVectors(kInputFillValue, kOutputFillValue); | 92 FillTestVectors(kInputFillValue, kOutputFillValue); |
110 vector_math::FMAC_NEON( | 93 vector_math::FMAC_NEON( |
111 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 94 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
112 VerifyOutput(kResult); | 95 VerifyOutput(kResult); |
113 } | 96 } |
114 #endif | 97 #endif |
115 } | 98 } |
116 | 99 |
117 // Ensure each optimized vector_math::FMUL() method returns the same value. | 100 // Ensure each optimized vector_math::FMUL() method returns the same value. |
118 TEST_F(VectorMathTest, FMUL) { | 101 TEST_F(VectorMathTest, FMUL) { |
119 static const float kResult = kInputFillValue * kScale; | 102 static const float kResult = kInputFillValue * kScale; |
120 | 103 |
121 { | 104 { |
122 SCOPED_TRACE("FMUL"); | 105 SCOPED_TRACE("FMUL"); |
123 FillTestVectors(kInputFillValue, kOutputFillValue); | 106 FillTestVectors(kInputFillValue, kOutputFillValue); |
124 vector_math::FMUL( | 107 vector_math::FMUL( |
125 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 108 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
126 VerifyOutput(kResult); | 109 VerifyOutput(kResult); |
127 } | 110 } |
128 | 111 |
129 { | 112 { |
130 SCOPED_TRACE("FMUL_C"); | 113 SCOPED_TRACE("FMUL_C"); |
131 FillTestVectors(kInputFillValue, kOutputFillValue); | 114 FillTestVectors(kInputFillValue, kOutputFillValue); |
132 vector_math::FMUL_C( | 115 vector_math::FMUL_C( |
133 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 116 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
134 VerifyOutput(kResult); | 117 VerifyOutput(kResult); |
135 } | 118 } |
136 | 119 |
137 #if defined(ARCH_CPU_X86_FAMILY) | 120 #if defined(ARCH_CPU_X86_FAMILY) |
138 { | 121 { |
139 ASSERT_TRUE(base::CPU().has_sse()); | 122 ASSERT_TRUE(base::CPU().has_sse()); |
140 SCOPED_TRACE("FMUL_SSE"); | 123 SCOPED_TRACE("FMUL_SSE"); |
141 FillTestVectors(kInputFillValue, kOutputFillValue); | 124 FillTestVectors(kInputFillValue, kOutputFillValue); |
142 vector_math::FMUL_SSE( | 125 vector_math::FMUL_SSE( |
143 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 126 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
144 VerifyOutput(kResult); | 127 VerifyOutput(kResult); |
145 } | 128 } |
146 #endif | 129 #endif |
147 | 130 |
148 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 131 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
149 { | 132 { |
150 SCOPED_TRACE("FMUL_NEON"); | 133 SCOPED_TRACE("FMUL_NEON"); |
151 FillTestVectors(kInputFillValue, kOutputFillValue); | 134 FillTestVectors(kInputFillValue, kOutputFillValue); |
152 vector_math::FMUL_NEON( | 135 vector_math::FMUL_NEON( |
153 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 136 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
154 VerifyOutput(kResult); | 137 VerifyOutput(kResult); |
155 } | 138 } |
156 #endif | 139 #endif |
157 } | 140 } |
158 | 141 |
159 // Define platform independent function name for FMACBenchmark* tests. | |
160 #if defined(ARCH_CPU_X86_FAMILY) | |
161 #define FMAC_FUNC FMAC_SSE | |
162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
163 #define FMAC_FUNC FMAC_NEON | |
164 #endif | |
165 | |
166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks | |
167 // were run with --vector-fmac-iterations=200000. | |
168 TEST_F(VectorMathTest, FMACBenchmark) { | |
169 static const int kBenchmarkIterations = BenchmarkIterations(); | |
170 | |
171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
172 | |
173 // Benchmark FMAC_C(). | |
174 FillTestVectors(kInputFillValue, kOutputFillValue); | |
175 TimeTicks start = TimeTicks::HighResNow(); | |
176 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
177 vector_math::FMAC_C( | |
178 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
179 } | |
180 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
181 printf("FMAC_C took %.2fms.\n", total_time_c_ms); | |
182 | |
183 #if defined(FMAC_FUNC) | |
184 #if defined(ARCH_CPU_X86_FAMILY) | |
185 ASSERT_TRUE(base::CPU().has_sse()); | |
186 #endif | |
187 | |
188 // Benchmark FMAC_FUNC() with unaligned size. | |
189 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
190 sizeof(float)), 0U); | |
191 FillTestVectors(kInputFillValue, kOutputFillValue); | |
192 start = TimeTicks::HighResNow(); | |
193 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
194 vector_math::FMAC_FUNC( | |
195 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
196 } | |
197 double total_time_optimized_unaligned_ms = | |
198 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
199 printf(STRINGIZE(FMAC_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms, | |
201 total_time_c_ms / total_time_optimized_unaligned_ms); | |
202 | |
203 // Benchmark FMAC_FUNC() with aligned size. | |
204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
205 0U); | |
206 FillTestVectors(kInputFillValue, kOutputFillValue); | |
207 start = TimeTicks::HighResNow(); | |
208 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
209 vector_math::FMAC_FUNC( | |
210 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
211 } | |
212 double total_time_optimized_aligned_ms = | |
213 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
214 printf(STRINGIZE(FMAC_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
215 "faster than FMAC_C and %.2fx faster than " | |
216 STRINGIZE(FMAC_FUNC) " (unaligned).\n", | |
217 total_time_optimized_aligned_ms, | |
218 total_time_c_ms / total_time_optimized_aligned_ms, | |
219 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
220 #endif | |
221 } | |
222 | |
223 #undef FMAC_FUNC | |
224 | |
225 // Define platform independent function name for FMULBenchmark* tests. | |
226 #if defined(ARCH_CPU_X86_FAMILY) | |
227 #define FMUL_FUNC FMUL_SSE | |
228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
229 #define FMUL_FUNC FMUL_NEON | |
230 #endif | |
231 | |
232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks | |
233 // were run with --vector-math-iterations=200000. | |
234 TEST_F(VectorMathTest, FMULBenchmark) { | |
235 static const int kBenchmarkIterations = BenchmarkIterations(); | |
236 | |
237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
238 | |
239 // Benchmark FMUL_C(). | |
240 FillTestVectors(kInputFillValue, kOutputFillValue); | |
241 TimeTicks start = TimeTicks::HighResNow(); | |
242 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
243 vector_math::FMUL_C( | |
244 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
245 } | |
246 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
247 printf("FMUL_C took %.2fms.\n", total_time_c_ms); | |
248 | |
249 #if defined(FMUL_FUNC) | |
250 #if defined(ARCH_CPU_X86_FAMILY) | |
251 ASSERT_TRUE(base::CPU().has_sse()); | |
252 #endif | |
253 | |
254 // Benchmark FMUL_SSE() with unaligned size. | |
255 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
256 sizeof(float)), 0U); | |
257 FillTestVectors(kInputFillValue, kOutputFillValue); | |
258 start = TimeTicks::HighResNow(); | |
259 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
260 vector_math::FMUL_FUNC( | |
261 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
262 } | |
263 double total_time_optimized_unaligned_ms = | |
264 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
265 printf(STRINGIZE(FMUL_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms, | |
267 total_time_c_ms / total_time_optimized_unaligned_ms); | |
268 | |
269 // Benchmark FMUL_SSE() with aligned size. | |
270 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
271 0U); | |
272 FillTestVectors(kInputFillValue, kOutputFillValue); | |
273 start = TimeTicks::HighResNow(); | |
274 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
275 vector_math::FMUL_FUNC( | |
276 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
277 } | |
278 double total_time_optimized_aligned_ms = | |
279 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
280 printf(STRINGIZE(FMUL_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
281 "faster than FMUL_C and %.2fx faster than " | |
282 STRINGIZE(FMUL_FUNC) " (unaligned).\n", | |
283 total_time_optimized_aligned_ms, | |
284 total_time_c_ms / total_time_optimized_aligned_ms, | |
285 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
286 #endif | |
287 } | |
288 | |
289 #undef FMUL_FUNC | |
290 | |
291 } // namespace media | 142 } // namespace media |
OLD | NEW |