| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
| 6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
| 7 #include <cmath> | 7 #include <cmath> |
| 8 | 8 |
| 9 #include "base/command_line.h" | |
| 10 #include "base/cpu.h" | 9 #include "base/cpu.h" |
| 11 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
| 12 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
| 14 #include "base/strings/stringize_macros.h" | 13 #include "base/strings/stringize_macros.h" |
| 15 #include "base/time/time.h" | |
| 16 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
| 17 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
| 18 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
| 19 | 17 |
| 20 using base::TimeTicks; | |
| 21 using std::fill; | 18 using std::fill; |
| 22 | 19 |
| 23 // Command line switch for runtime adjustment of benchmark iterations. | |
| 24 static const char kBenchmarkIterations[] = "vector-math-iterations"; | |
| 25 static const int kDefaultIterations = 10; | |
| 26 | |
| 27 // Default test values. | 20 // Default test values. |
| 28 static const float kScale = 0.5; | 21 static const float kScale = 0.5; |
| 29 static const float kInputFillValue = 1.0; | 22 static const float kInputFillValue = 1.0; |
| 30 static const float kOutputFillValue = 3.0; | 23 static const float kOutputFillValue = 3.0; |
| 31 | 24 |
| 32 namespace media { | 25 namespace media { |
| 33 | 26 |
| 34 class VectorMathTest : public testing::Test { | 27 class VectorMathTest : public testing::Test { |
| 35 public: | 28 public: |
| 36 static const int kVectorSize = 8192; | 29 static const int kVectorSize = 8192; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 47 // Setup input and output vectors. | 40 // Setup input and output vectors. |
| 48 fill(input_vector.get(), input_vector.get() + kVectorSize, input); | 41 fill(input_vector.get(), input_vector.get() + kVectorSize, input); |
| 49 fill(output_vector.get(), output_vector.get() + kVectorSize, output); | 42 fill(output_vector.get(), output_vector.get() + kVectorSize, output); |
| 50 } | 43 } |
| 51 | 44 |
| 52 void VerifyOutput(float value) { | 45 void VerifyOutput(float value) { |
| 53 for (int i = 0; i < kVectorSize; ++i) | 46 for (int i = 0; i < kVectorSize; ++i) |
| 54 ASSERT_FLOAT_EQ(output_vector.get()[i], value); | 47 ASSERT_FLOAT_EQ(output_vector.get()[i], value); |
| 55 } | 48 } |
| 56 | 49 |
| 57 int BenchmarkIterations() { | |
| 58 int vector_math_iterations = kDefaultIterations; | |
| 59 std::string iterations( | |
| 60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
| 61 kBenchmarkIterations)); | |
| 62 if (!iterations.empty()) | |
| 63 base::StringToInt(iterations, &vector_math_iterations); | |
| 64 return vector_math_iterations; | |
| 65 } | |
| 66 | |
| 67 protected: | 50 protected: |
| 68 int benchmark_iterations; | |
| 69 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; | 51 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; |
| 70 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; | 52 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; |
| 71 | 53 |
| 72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); | 54 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); |
| 73 }; | 55 }; |
| 74 | 56 |
| 75 // Ensure each optimized vector_math::FMAC() method returns the same value. | 57 // Ensure each optimized vector_math::FMAC() method returns the same value. |
| 76 TEST_F(VectorMathTest, FMAC) { | 58 TEST_F(VectorMathTest, FMAC) { |
| 77 static const float kResult = kInputFillValue * kScale + kOutputFillValue; | 59 static const float kResult = kInputFillValue * kScale + kOutputFillValue; |
| 78 | 60 |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 149 { | 131 { |
| 150 SCOPED_TRACE("FMUL_NEON"); | 132 SCOPED_TRACE("FMUL_NEON"); |
| 151 FillTestVectors(kInputFillValue, kOutputFillValue); | 133 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 152 vector_math::FMUL_NEON( | 134 vector_math::FMUL_NEON( |
| 153 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 135 input_vector.get(), kScale, kVectorSize, output_vector.get()); |
| 154 VerifyOutput(kResult); | 136 VerifyOutput(kResult); |
| 155 } | 137 } |
| 156 #endif | 138 #endif |
| 157 } | 139 } |
| 158 | 140 |
| 159 // Define platform independent function name for FMACBenchmark* tests. | |
| 160 #if defined(ARCH_CPU_X86_FAMILY) | |
| 161 #define FMAC_FUNC FMAC_SSE | |
| 162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 163 #define FMAC_FUNC FMAC_NEON | |
| 164 #endif | |
| 165 | |
| 166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks | |
| 167 // were run with --vector-fmac-iterations=200000. | |
| 168 TEST_F(VectorMathTest, FMACBenchmark) { | |
| 169 static const int kBenchmarkIterations = BenchmarkIterations(); | |
| 170 | |
| 171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
| 172 | |
| 173 // Benchmark FMAC_C(). | |
| 174 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 175 TimeTicks start = TimeTicks::HighResNow(); | |
| 176 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
| 177 vector_math::FMAC_C( | |
| 178 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 179 } | |
| 180 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 181 printf("FMAC_C took %.2fms.\n", total_time_c_ms); | |
| 182 | |
| 183 #if defined(FMAC_FUNC) | |
| 184 #if defined(ARCH_CPU_X86_FAMILY) | |
| 185 ASSERT_TRUE(base::CPU().has_sse()); | |
| 186 #endif | |
| 187 | |
| 188 // Benchmark FMAC_FUNC() with unaligned size. | |
| 189 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
| 190 sizeof(float)), 0U); | |
| 191 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 192 start = TimeTicks::HighResNow(); | |
| 193 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 194 vector_math::FMAC_FUNC( | |
| 195 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
| 196 } | |
| 197 double total_time_optimized_unaligned_ms = | |
| 198 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 199 printf(STRINGIZE(FMAC_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
| 200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms, | |
| 201 total_time_c_ms / total_time_optimized_unaligned_ms); | |
| 202 | |
| 203 // Benchmark FMAC_FUNC() with aligned size. | |
| 204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
| 205 0U); | |
| 206 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 207 start = TimeTicks::HighResNow(); | |
| 208 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 209 vector_math::FMAC_FUNC( | |
| 210 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 211 } | |
| 212 double total_time_optimized_aligned_ms = | |
| 213 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 214 printf(STRINGIZE(FMAC_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
| 215 "faster than FMAC_C and %.2fx faster than " | |
| 216 STRINGIZE(FMAC_FUNC) " (unaligned).\n", | |
| 217 total_time_optimized_aligned_ms, | |
| 218 total_time_c_ms / total_time_optimized_aligned_ms, | |
| 219 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
| 220 #endif | |
| 221 } | |
| 222 | |
| 223 #undef FMAC_FUNC | |
| 224 | |
| 225 // Define platform independent function name for FMULBenchmark* tests. | |
| 226 #if defined(ARCH_CPU_X86_FAMILY) | |
| 227 #define FMUL_FUNC FMUL_SSE | |
| 228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 229 #define FMUL_FUNC FMUL_NEON | |
| 230 #endif | |
| 231 | |
| 232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks | |
| 233 // were run with --vector-math-iterations=200000. | |
| 234 TEST_F(VectorMathTest, FMULBenchmark) { | |
| 235 static const int kBenchmarkIterations = BenchmarkIterations(); | |
| 236 | |
| 237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
| 238 | |
| 239 // Benchmark FMUL_C(). | |
| 240 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 241 TimeTicks start = TimeTicks::HighResNow(); | |
| 242 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
| 243 vector_math::FMUL_C( | |
| 244 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 245 } | |
| 246 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 247 printf("FMUL_C took %.2fms.\n", total_time_c_ms); | |
| 248 | |
| 249 #if defined(FMUL_FUNC) | |
| 250 #if defined(ARCH_CPU_X86_FAMILY) | |
| 251 ASSERT_TRUE(base::CPU().has_sse()); | |
| 252 #endif | |
| 253 | |
| 254 // Benchmark FMUL_SSE() with unaligned size. | |
| 255 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
| 256 sizeof(float)), 0U); | |
| 257 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 258 start = TimeTicks::HighResNow(); | |
| 259 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 260 vector_math::FMUL_FUNC( | |
| 261 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
| 262 } | |
| 263 double total_time_optimized_unaligned_ms = | |
| 264 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 265 printf(STRINGIZE(FMUL_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
| 266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms, | |
| 267 total_time_c_ms / total_time_optimized_unaligned_ms); | |
| 268 | |
| 269 // Benchmark FMUL_SSE() with aligned size. | |
| 270 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
| 271 0U); | |
| 272 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 273 start = TimeTicks::HighResNow(); | |
| 274 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 275 vector_math::FMUL_FUNC( | |
| 276 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 277 } | |
| 278 double total_time_optimized_aligned_ms = | |
| 279 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 280 printf(STRINGIZE(FMUL_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
| 281 "faster than FMUL_C and %.2fx faster than " | |
| 282 STRINGIZE(FMUL_FUNC) " (unaligned).\n", | |
| 283 total_time_optimized_aligned_ms, | |
| 284 total_time_c_ms / total_time_optimized_aligned_ms, | |
| 285 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
| 286 #endif | |
| 287 } | |
| 288 | |
| 289 #undef FMUL_FUNC | |
| 290 | |
| 291 } // namespace media | 141 } // namespace media |
| OLD | NEW |