| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
| 6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
| 7 #include <cmath> | 7 #include <cmath> |
| 8 | 8 |
| 9 #include "base/command_line.h" | |
| 10 #include "base/cpu.h" | 9 #include "base/cpu.h" |
| 11 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
| 12 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
| 14 #include "base/strings/stringize_macros.h" | 13 #include "base/strings/stringize_macros.h" |
| 15 #include "base/time/time.h" | |
| 16 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
| 17 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
| 18 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
| 19 | 17 |
| 20 using base::TimeTicks; | |
| 21 using std::fill; | 18 using std::fill; |
| 22 | 19 |
| 23 // Command line switch for runtime adjustment of benchmark iterations. | 20 namespace media { |
| 24 static const char kBenchmarkIterations[] = "vector-math-iterations"; | |
| 25 static const int kDefaultIterations = 10; | |
| 26 | 21 |
| 27 // Default test values. | 22 // Default test values. |
| 28 static const float kScale = 0.5; | 23 static const float kScale = 0.5; |
| 29 static const float kInputFillValue = 1.0; | 24 static const float kInputFillValue = 1.0; |
| 30 static const float kOutputFillValue = 3.0; | 25 static const float kOutputFillValue = 3.0; |
| 31 | 26 static const int kVectorSize = 8192; |
| 32 namespace media { | |
| 33 | 27 |
| 34 class VectorMathTest : public testing::Test { | 28 class VectorMathTest : public testing::Test { |
| 35 public: | 29 public: |
| 36 static const int kVectorSize = 8192; | |
| 37 | 30 |
| 38 VectorMathTest() { | 31 VectorMathTest() { |
| 39 // Initialize input and output vectors. | 32 // Initialize input and output vectors. |
| 40 input_vector.reset(static_cast<float*>(base::AlignedAlloc( | 33 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
| 41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 34 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
| 42 output_vector.reset(static_cast<float*>(base::AlignedAlloc( | 35 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
| 43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 36 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
| 44 } | 37 } |
| 45 | 38 |
| 46 void FillTestVectors(float input, float output) { | 39 void FillTestVectors(float input, float output) { |
| 47 // Setup input and output vectors. | 40 // Setup input and output vectors. |
| 48 fill(input_vector.get(), input_vector.get() + kVectorSize, input); | 41 fill(input_vector_.get(), input_vector_.get() + kVectorSize, input); |
| 49 fill(output_vector.get(), output_vector.get() + kVectorSize, output); | 42 fill(output_vector_.get(), output_vector_.get() + kVectorSize, output); |
| 50 } | 43 } |
| 51 | 44 |
| 52 void VerifyOutput(float value) { | 45 void VerifyOutput(float value) { |
| 53 for (int i = 0; i < kVectorSize; ++i) | 46 for (int i = 0; i < kVectorSize; ++i) |
| 54 ASSERT_FLOAT_EQ(output_vector.get()[i], value); | 47 ASSERT_FLOAT_EQ(output_vector_.get()[i], value); |
| 55 } | |
| 56 | |
| 57 int BenchmarkIterations() { | |
| 58 int vector_math_iterations = kDefaultIterations; | |
| 59 std::string iterations( | |
| 60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
| 61 kBenchmarkIterations)); | |
| 62 if (!iterations.empty()) | |
| 63 base::StringToInt(iterations, &vector_math_iterations); | |
| 64 return vector_math_iterations; | |
| 65 } | 48 } |
| 66 | 49 |
| 67 protected: | 50 protected: |
| 68 int benchmark_iterations; | 51 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_; |
| 69 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; | 52 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_; |
| 70 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; | |
| 71 | 53 |
| 72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); | 54 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); |
| 73 }; | 55 }; |
| 74 | 56 |
| 75 // Ensure each optimized vector_math::FMAC() method returns the same value. | 57 // Ensure each optimized vector_math::FMAC() method returns the same value. |
| 76 TEST_F(VectorMathTest, FMAC) { | 58 TEST_F(VectorMathTest, FMAC) { |
| 77 static const float kResult = kInputFillValue * kScale + kOutputFillValue; | 59 static const float kResult = kInputFillValue * kScale + kOutputFillValue; |
| 78 | 60 |
| 79 { | 61 { |
| 80 SCOPED_TRACE("FMAC"); | 62 SCOPED_TRACE("FMAC"); |
| 81 FillTestVectors(kInputFillValue, kOutputFillValue); | 63 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 82 vector_math::FMAC( | 64 vector_math::FMAC( |
| 83 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 65 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 84 VerifyOutput(kResult); | 66 VerifyOutput(kResult); |
| 85 } | 67 } |
| 86 | 68 |
| 87 { | 69 { |
| 88 SCOPED_TRACE("FMAC_C"); | 70 SCOPED_TRACE("FMAC_C"); |
| 89 FillTestVectors(kInputFillValue, kOutputFillValue); | 71 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 90 vector_math::FMAC_C( | 72 vector_math::FMAC_C( |
| 91 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 73 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 92 VerifyOutput(kResult); | 74 VerifyOutput(kResult); |
| 93 } | 75 } |
| 94 | 76 |
| 95 #if defined(ARCH_CPU_X86_FAMILY) | 77 #if defined(ARCH_CPU_X86_FAMILY) |
| 96 { | 78 { |
| 97 ASSERT_TRUE(base::CPU().has_sse()); | 79 ASSERT_TRUE(base::CPU().has_sse()); |
| 98 SCOPED_TRACE("FMAC_SSE"); | 80 SCOPED_TRACE("FMAC_SSE"); |
| 99 FillTestVectors(kInputFillValue, kOutputFillValue); | 81 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 100 vector_math::FMAC_SSE( | 82 vector_math::FMAC_SSE( |
| 101 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 83 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 102 VerifyOutput(kResult); | 84 VerifyOutput(kResult); |
| 103 } | 85 } |
| 104 #endif | 86 #endif |
| 105 | 87 |
| 106 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 88 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
| 107 { | 89 { |
| 108 SCOPED_TRACE("FMAC_NEON"); | 90 SCOPED_TRACE("FMAC_NEON"); |
| 109 FillTestVectors(kInputFillValue, kOutputFillValue); | 91 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 110 vector_math::FMAC_NEON( | 92 vector_math::FMAC_NEON( |
| 111 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 93 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 112 VerifyOutput(kResult); | 94 VerifyOutput(kResult); |
| 113 } | 95 } |
| 114 #endif | 96 #endif |
| 115 } | 97 } |
| 116 | 98 |
| 117 // Ensure each optimized vector_math::FMUL() method returns the same value. | 99 // Ensure each optimized vector_math::FMUL() method returns the same value. |
| 118 TEST_F(VectorMathTest, FMUL) { | 100 TEST_F(VectorMathTest, FMUL) { |
| 119 static const float kResult = kInputFillValue * kScale; | 101 static const float kResult = kInputFillValue * kScale; |
| 120 | 102 |
| 121 { | 103 { |
| 122 SCOPED_TRACE("FMUL"); | 104 SCOPED_TRACE("FMUL"); |
| 123 FillTestVectors(kInputFillValue, kOutputFillValue); | 105 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 124 vector_math::FMUL( | 106 vector_math::FMUL( |
| 125 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 107 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 126 VerifyOutput(kResult); | 108 VerifyOutput(kResult); |
| 127 } | 109 } |
| 128 | 110 |
| 129 { | 111 { |
| 130 SCOPED_TRACE("FMUL_C"); | 112 SCOPED_TRACE("FMUL_C"); |
| 131 FillTestVectors(kInputFillValue, kOutputFillValue); | 113 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 132 vector_math::FMUL_C( | 114 vector_math::FMUL_C( |
| 133 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 115 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 134 VerifyOutput(kResult); | 116 VerifyOutput(kResult); |
| 135 } | 117 } |
| 136 | 118 |
| 137 #if defined(ARCH_CPU_X86_FAMILY) | 119 #if defined(ARCH_CPU_X86_FAMILY) |
| 138 { | 120 { |
| 139 ASSERT_TRUE(base::CPU().has_sse()); | 121 ASSERT_TRUE(base::CPU().has_sse()); |
| 140 SCOPED_TRACE("FMUL_SSE"); | 122 SCOPED_TRACE("FMUL_SSE"); |
| 141 FillTestVectors(kInputFillValue, kOutputFillValue); | 123 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 142 vector_math::FMUL_SSE( | 124 vector_math::FMUL_SSE( |
| 143 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 125 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 144 VerifyOutput(kResult); | 126 VerifyOutput(kResult); |
| 145 } | 127 } |
| 146 #endif | 128 #endif |
| 147 | 129 |
| 148 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 130 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
| 149 { | 131 { |
| 150 SCOPED_TRACE("FMUL_NEON"); | 132 SCOPED_TRACE("FMUL_NEON"); |
| 151 FillTestVectors(kInputFillValue, kOutputFillValue); | 133 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 152 vector_math::FMUL_NEON( | 134 vector_math::FMUL_NEON( |
| 153 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 135 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 154 VerifyOutput(kResult); | 136 VerifyOutput(kResult); |
| 155 } | 137 } |
| 156 #endif | 138 #endif |
| 157 } | 139 } |
| 158 | 140 |
| 159 // Define platform independent function name for FMACBenchmark* tests. | |
| 160 #if defined(ARCH_CPU_X86_FAMILY) | |
| 161 #define FMAC_FUNC FMAC_SSE | |
| 162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 163 #define FMAC_FUNC FMAC_NEON | |
| 164 #endif | |
| 165 | |
| 166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks | |
| 167 // were run with --vector-fmac-iterations=200000. | |
| 168 TEST_F(VectorMathTest, FMACBenchmark) { | |
| 169 static const int kBenchmarkIterations = BenchmarkIterations(); | |
| 170 | |
| 171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
| 172 | |
| 173 // Benchmark FMAC_C(). | |
| 174 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 175 TimeTicks start = TimeTicks::HighResNow(); | |
| 176 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
| 177 vector_math::FMAC_C( | |
| 178 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 179 } | |
| 180 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 181 printf("FMAC_C took %.2fms.\n", total_time_c_ms); | |
| 182 | |
| 183 #if defined(FMAC_FUNC) | |
| 184 #if defined(ARCH_CPU_X86_FAMILY) | |
| 185 ASSERT_TRUE(base::CPU().has_sse()); | |
| 186 #endif | |
| 187 | |
| 188 // Benchmark FMAC_FUNC() with unaligned size. | |
| 189 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
| 190 sizeof(float)), 0U); | |
| 191 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 192 start = TimeTicks::HighResNow(); | |
| 193 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 194 vector_math::FMAC_FUNC( | |
| 195 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
| 196 } | |
| 197 double total_time_optimized_unaligned_ms = | |
| 198 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 199 printf(STRINGIZE(FMAC_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
| 200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms, | |
| 201 total_time_c_ms / total_time_optimized_unaligned_ms); | |
| 202 | |
| 203 // Benchmark FMAC_FUNC() with aligned size. | |
| 204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
| 205 0U); | |
| 206 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 207 start = TimeTicks::HighResNow(); | |
| 208 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 209 vector_math::FMAC_FUNC( | |
| 210 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 211 } | |
| 212 double total_time_optimized_aligned_ms = | |
| 213 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 214 printf(STRINGIZE(FMAC_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
| 215 "faster than FMAC_C and %.2fx faster than " | |
| 216 STRINGIZE(FMAC_FUNC) " (unaligned).\n", | |
| 217 total_time_optimized_aligned_ms, | |
| 218 total_time_c_ms / total_time_optimized_aligned_ms, | |
| 219 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
| 220 #endif | |
| 221 } | |
| 222 | |
| 223 #undef FMAC_FUNC | |
| 224 | |
| 225 // Define platform independent function name for FMULBenchmark* tests. | |
| 226 #if defined(ARCH_CPU_X86_FAMILY) | |
| 227 #define FMUL_FUNC FMUL_SSE | |
| 228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 229 #define FMUL_FUNC FMUL_NEON | |
| 230 #endif | |
| 231 | |
| 232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks | |
| 233 // were run with --vector-math-iterations=200000. | |
| 234 TEST_F(VectorMathTest, FMULBenchmark) { | |
| 235 static const int kBenchmarkIterations = BenchmarkIterations(); | |
| 236 | |
| 237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
| 238 | |
| 239 // Benchmark FMUL_C(). | |
| 240 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 241 TimeTicks start = TimeTicks::HighResNow(); | |
| 242 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
| 243 vector_math::FMUL_C( | |
| 244 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 245 } | |
| 246 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 247 printf("FMUL_C took %.2fms.\n", total_time_c_ms); | |
| 248 | |
| 249 #if defined(FMUL_FUNC) | |
| 250 #if defined(ARCH_CPU_X86_FAMILY) | |
| 251 ASSERT_TRUE(base::CPU().has_sse()); | |
| 252 #endif | |
| 253 | |
| 254 // Benchmark FMUL_SSE() with unaligned size. | |
| 255 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
| 256 sizeof(float)), 0U); | |
| 257 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 258 start = TimeTicks::HighResNow(); | |
| 259 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 260 vector_math::FMUL_FUNC( | |
| 261 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
| 262 } | |
| 263 double total_time_optimized_unaligned_ms = | |
| 264 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 265 printf(STRINGIZE(FMUL_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
| 266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms, | |
| 267 total_time_c_ms / total_time_optimized_unaligned_ms); | |
| 268 | |
| 269 // Benchmark FMUL_SSE() with aligned size. | |
| 270 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
| 271 0U); | |
| 272 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 273 start = TimeTicks::HighResNow(); | |
| 274 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 275 vector_math::FMUL_FUNC( | |
| 276 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 277 } | |
| 278 double total_time_optimized_aligned_ms = | |
| 279 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 280 printf(STRINGIZE(FMUL_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
| 281 "faster than FMUL_C and %.2fx faster than " | |
| 282 STRINGIZE(FMUL_FUNC) " (unaligned).\n", | |
| 283 total_time_optimized_aligned_ms, | |
| 284 total_time_c_ms / total_time_optimized_aligned_ms, | |
| 285 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
| 286 #endif | |
| 287 } | |
| 288 | |
| 289 #undef FMUL_FUNC | |
| 290 | |
| 291 } // namespace media | 141 } // namespace media |
| OLD | NEW |