Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
| 6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
| 7 #include <cmath> | 7 #include <cmath> |
| 8 | 8 |
| 9 #include "base/command_line.h" | |
| 10 #include "base/cpu.h" | 9 #include "base/cpu.h" |
| 11 #include "base/memory/aligned_memory.h" | 10 #include "base/memory/aligned_memory.h" |
| 12 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
| 13 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
| 14 #include "base/strings/stringize_macros.h" | 13 #include "base/strings/stringize_macros.h" |
| 15 #include "base/time/time.h" | |
| 16 #include "media/base/vector_math.h" | 14 #include "media/base/vector_math.h" |
| 17 #include "media/base/vector_math_testing.h" | 15 #include "media/base/vector_math_testing.h" |
| 18 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
| 19 | 17 |
| 20 using base::TimeTicks; | |
| 21 using std::fill; | 18 using std::fill; |
| 22 | 19 |
|
scherkus (not reviewing)
2013/10/29 18:56:20
remove extra blank line
| |
| 23 // Command line switch for runtime adjustment of benchmark iterations. | 20 |
| 24 static const char kBenchmarkIterations[] = "vector-math-iterations"; | 21 namespace media { |
| 25 static const int kDefaultIterations = 10; | |
| 26 | 22 |
| 27 // Default test values. | 23 // Default test values. |
| 28 static const float kScale = 0.5; | 24 static const float kScale = 0.5; |
| 29 static const float kInputFillValue = 1.0; | 25 static const float kInputFillValue = 1.0; |
| 30 static const float kOutputFillValue = 3.0; | 26 static const float kOutputFillValue = 3.0; |
| 31 | 27 static const int kVectorSize = 8192; |
| 32 namespace media { | |
| 33 | 28 |
| 34 class VectorMathTest : public testing::Test { | 29 class VectorMathTest : public testing::Test { |
| 35 public: | 30 public: |
| 36 static const int kVectorSize = 8192; | |
| 37 | 31 |
| 38 VectorMathTest() { | 32 VectorMathTest() { |
| 39 // Initialize input and output vectors. | 33 // Initialize input and output vectors. |
| 40 input_vector.reset(static_cast<float*>(base::AlignedAlloc( | 34 input_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
| 41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 35 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
| 42 output_vector.reset(static_cast<float*>(base::AlignedAlloc( | 36 output_vector_.reset(static_cast<float*>(base::AlignedAlloc( |
| 43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); | 37 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment))); |
| 44 } | 38 } |
| 45 | 39 |
| 46 void FillTestVectors(float input, float output) { | 40 void FillTestVectors(float input, float output) { |
| 47 // Setup input and output vectors. | 41 // Setup input and output vectors. |
| 48 fill(input_vector.get(), input_vector.get() + kVectorSize, input); | 42 fill(input_vector_.get(), input_vector_.get() + kVectorSize, input); |
| 49 fill(output_vector.get(), output_vector.get() + kVectorSize, output); | 43 fill(output_vector_.get(), output_vector_.get() + kVectorSize, output); |
| 50 } | 44 } |
| 51 | 45 |
| 52 void VerifyOutput(float value) { | 46 void VerifyOutput(float value) { |
| 53 for (int i = 0; i < kVectorSize; ++i) | 47 for (int i = 0; i < kVectorSize; ++i) |
| 54 ASSERT_FLOAT_EQ(output_vector.get()[i], value); | 48 ASSERT_FLOAT_EQ(output_vector_.get()[i], value); |
| 55 } | |
| 56 | |
| 57 int BenchmarkIterations() { | |
| 58 int vector_math_iterations = kDefaultIterations; | |
| 59 std::string iterations( | |
| 60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII( | |
| 61 kBenchmarkIterations)); | |
| 62 if (!iterations.empty()) | |
| 63 base::StringToInt(iterations, &vector_math_iterations); | |
| 64 return vector_math_iterations; | |
| 65 } | 49 } |
| 66 | 50 |
| 67 protected: | 51 protected: |
| 68 int benchmark_iterations; | 52 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector_; |
| 69 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector; | 53 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector_; |
| 70 scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector; | |
| 71 | 54 |
| 72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); | 55 DISALLOW_COPY_AND_ASSIGN(VectorMathTest); |
| 73 }; | 56 }; |
| 74 | 57 |
| 75 // Ensure each optimized vector_math::FMAC() method returns the same value. | 58 // Ensure each optimized vector_math::FMAC() method returns the same value. |
| 76 TEST_F(VectorMathTest, FMAC) { | 59 TEST_F(VectorMathTest, FMAC) { |
| 77 static const float kResult = kInputFillValue * kScale + kOutputFillValue; | 60 static const float kResult = kInputFillValue * kScale + kOutputFillValue; |
| 78 | 61 |
| 79 { | 62 { |
| 80 SCOPED_TRACE("FMAC"); | 63 SCOPED_TRACE("FMAC"); |
| 81 FillTestVectors(kInputFillValue, kOutputFillValue); | 64 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 82 vector_math::FMAC( | 65 vector_math::FMAC( |
| 83 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 66 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 84 VerifyOutput(kResult); | 67 VerifyOutput(kResult); |
| 85 } | 68 } |
| 86 | 69 |
| 87 { | 70 { |
| 88 SCOPED_TRACE("FMAC_C"); | 71 SCOPED_TRACE("FMAC_C"); |
| 89 FillTestVectors(kInputFillValue, kOutputFillValue); | 72 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 90 vector_math::FMAC_C( | 73 vector_math::FMAC_C( |
| 91 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 74 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 92 VerifyOutput(kResult); | 75 VerifyOutput(kResult); |
| 93 } | 76 } |
| 94 | 77 |
| 95 #if defined(ARCH_CPU_X86_FAMILY) | 78 #if defined(ARCH_CPU_X86_FAMILY) |
| 96 { | 79 { |
| 97 ASSERT_TRUE(base::CPU().has_sse()); | 80 ASSERT_TRUE(base::CPU().has_sse()); |
| 98 SCOPED_TRACE("FMAC_SSE"); | 81 SCOPED_TRACE("FMAC_SSE"); |
| 99 FillTestVectors(kInputFillValue, kOutputFillValue); | 82 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 100 vector_math::FMAC_SSE( | 83 vector_math::FMAC_SSE( |
| 101 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 84 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 102 VerifyOutput(kResult); | 85 VerifyOutput(kResult); |
| 103 } | 86 } |
| 104 #endif | 87 #endif |
| 105 | 88 |
| 106 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 89 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
| 107 { | 90 { |
| 108 SCOPED_TRACE("FMAC_NEON"); | 91 SCOPED_TRACE("FMAC_NEON"); |
| 109 FillTestVectors(kInputFillValue, kOutputFillValue); | 92 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 110 vector_math::FMAC_NEON( | 93 vector_math::FMAC_NEON( |
| 111 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 94 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 112 VerifyOutput(kResult); | 95 VerifyOutput(kResult); |
| 113 } | 96 } |
| 114 #endif | 97 #endif |
| 115 } | 98 } |
| 116 | 99 |
| 117 // Ensure each optimized vector_math::FMUL() method returns the same value. | 100 // Ensure each optimized vector_math::FMUL() method returns the same value. |
| 118 TEST_F(VectorMathTest, FMUL) { | 101 TEST_F(VectorMathTest, FMUL) { |
| 119 static const float kResult = kInputFillValue * kScale; | 102 static const float kResult = kInputFillValue * kScale; |
| 120 | 103 |
| 121 { | 104 { |
| 122 SCOPED_TRACE("FMUL"); | 105 SCOPED_TRACE("FMUL"); |
| 123 FillTestVectors(kInputFillValue, kOutputFillValue); | 106 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 124 vector_math::FMUL( | 107 vector_math::FMUL( |
| 125 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 108 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 126 VerifyOutput(kResult); | 109 VerifyOutput(kResult); |
| 127 } | 110 } |
| 128 | 111 |
| 129 { | 112 { |
| 130 SCOPED_TRACE("FMUL_C"); | 113 SCOPED_TRACE("FMUL_C"); |
| 131 FillTestVectors(kInputFillValue, kOutputFillValue); | 114 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 132 vector_math::FMUL_C( | 115 vector_math::FMUL_C( |
| 133 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 116 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 134 VerifyOutput(kResult); | 117 VerifyOutput(kResult); |
| 135 } | 118 } |
| 136 | 119 |
| 137 #if defined(ARCH_CPU_X86_FAMILY) | 120 #if defined(ARCH_CPU_X86_FAMILY) |
| 138 { | 121 { |
| 139 ASSERT_TRUE(base::CPU().has_sse()); | 122 ASSERT_TRUE(base::CPU().has_sse()); |
| 140 SCOPED_TRACE("FMUL_SSE"); | 123 SCOPED_TRACE("FMUL_SSE"); |
| 141 FillTestVectors(kInputFillValue, kOutputFillValue); | 124 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 142 vector_math::FMUL_SSE( | 125 vector_math::FMUL_SSE( |
| 143 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 126 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 144 VerifyOutput(kResult); | 127 VerifyOutput(kResult); |
| 145 } | 128 } |
| 146 #endif | 129 #endif |
| 147 | 130 |
| 148 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 131 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
| 149 { | 132 { |
| 150 SCOPED_TRACE("FMUL_NEON"); | 133 SCOPED_TRACE("FMUL_NEON"); |
| 151 FillTestVectors(kInputFillValue, kOutputFillValue); | 134 FillTestVectors(kInputFillValue, kOutputFillValue); |
| 152 vector_math::FMUL_NEON( | 135 vector_math::FMUL_NEON( |
| 153 input_vector.get(), kScale, kVectorSize, output_vector.get()); | 136 input_vector_.get(), kScale, kVectorSize, output_vector_.get()); |
| 154 VerifyOutput(kResult); | 137 VerifyOutput(kResult); |
| 155 } | 138 } |
| 156 #endif | 139 #endif |
| 157 } | 140 } |
| 158 | 141 |
| 159 // Define platform independent function name for FMACBenchmark* tests. | |
| 160 #if defined(ARCH_CPU_X86_FAMILY) | |
| 161 #define FMAC_FUNC FMAC_SSE | |
| 162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 163 #define FMAC_FUNC FMAC_NEON | |
| 164 #endif | |
| 165 | |
| 166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks | |
| 167 // were run with --vector-fmac-iterations=200000. | |
| 168 TEST_F(VectorMathTest, FMACBenchmark) { | |
| 169 static const int kBenchmarkIterations = BenchmarkIterations(); | |
| 170 | |
| 171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
| 172 | |
| 173 // Benchmark FMAC_C(). | |
| 174 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 175 TimeTicks start = TimeTicks::HighResNow(); | |
| 176 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
| 177 vector_math::FMAC_C( | |
| 178 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 179 } | |
| 180 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 181 printf("FMAC_C took %.2fms.\n", total_time_c_ms); | |
| 182 | |
| 183 #if defined(FMAC_FUNC) | |
| 184 #if defined(ARCH_CPU_X86_FAMILY) | |
| 185 ASSERT_TRUE(base::CPU().has_sse()); | |
| 186 #endif | |
| 187 | |
| 188 // Benchmark FMAC_FUNC() with unaligned size. | |
| 189 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
| 190 sizeof(float)), 0U); | |
| 191 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 192 start = TimeTicks::HighResNow(); | |
| 193 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 194 vector_math::FMAC_FUNC( | |
| 195 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
| 196 } | |
| 197 double total_time_optimized_unaligned_ms = | |
| 198 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 199 printf(STRINGIZE(FMAC_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
| 200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms, | |
| 201 total_time_c_ms / total_time_optimized_unaligned_ms); | |
| 202 | |
| 203 // Benchmark FMAC_FUNC() with aligned size. | |
| 204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
| 205 0U); | |
| 206 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 207 start = TimeTicks::HighResNow(); | |
| 208 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 209 vector_math::FMAC_FUNC( | |
| 210 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 211 } | |
| 212 double total_time_optimized_aligned_ms = | |
| 213 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 214 printf(STRINGIZE(FMAC_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
| 215 "faster than FMAC_C and %.2fx faster than " | |
| 216 STRINGIZE(FMAC_FUNC) " (unaligned).\n", | |
| 217 total_time_optimized_aligned_ms, | |
| 218 total_time_c_ms / total_time_optimized_aligned_ms, | |
| 219 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
| 220 #endif | |
| 221 } | |
| 222 | |
| 223 #undef FMAC_FUNC | |
| 224 | |
| 225 // Define platform independent function name for FMULBenchmark* tests. | |
| 226 #if defined(ARCH_CPU_X86_FAMILY) | |
| 227 #define FMUL_FUNC FMUL_SSE | |
| 228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 229 #define FMUL_FUNC FMUL_NEON | |
| 230 #endif | |
| 231 | |
| 232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks | |
| 233 // were run with --vector-math-iterations=200000. | |
| 234 TEST_F(VectorMathTest, FMULBenchmark) { | |
| 235 static const int kBenchmarkIterations = BenchmarkIterations(); | |
| 236 | |
| 237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations); | |
| 238 | |
| 239 // Benchmark FMUL_C(). | |
| 240 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 241 TimeTicks start = TimeTicks::HighResNow(); | |
| 242 for (int i = 0; i < kBenchmarkIterations; ++i) { | |
| 243 vector_math::FMUL_C( | |
| 244 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 245 } | |
| 246 double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 247 printf("FMUL_C took %.2fms.\n", total_time_c_ms); | |
| 248 | |
| 249 #if defined(FMUL_FUNC) | |
| 250 #if defined(ARCH_CPU_X86_FAMILY) | |
| 251 ASSERT_TRUE(base::CPU().has_sse()); | |
| 252 #endif | |
| 253 | |
| 254 // Benchmark FMUL_SSE() with unaligned size. | |
| 255 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment / | |
| 256 sizeof(float)), 0U); | |
| 257 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 258 start = TimeTicks::HighResNow(); | |
| 259 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 260 vector_math::FMUL_FUNC( | |
| 261 input_vector.get(), kScale, kVectorSize - 1, output_vector.get()); | |
| 262 } | |
| 263 double total_time_optimized_unaligned_ms = | |
| 264 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 265 printf(STRINGIZE(FMUL_FUNC) " (unaligned size) took %.2fms; which is %.2fx " | |
| 266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms, | |
| 267 total_time_c_ms / total_time_optimized_unaligned_ms); | |
| 268 | |
| 269 // Benchmark FMUL_SSE() with aligned size. | |
| 270 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)), | |
| 271 0U); | |
| 272 FillTestVectors(kInputFillValue, kOutputFillValue); | |
| 273 start = TimeTicks::HighResNow(); | |
| 274 for (int j = 0; j < kBenchmarkIterations; ++j) { | |
| 275 vector_math::FMUL_FUNC( | |
| 276 input_vector.get(), kScale, kVectorSize, output_vector.get()); | |
| 277 } | |
| 278 double total_time_optimized_aligned_ms = | |
| 279 (TimeTicks::HighResNow() - start).InMillisecondsF(); | |
| 280 printf(STRINGIZE(FMUL_FUNC) " (aligned) took %.2fms; which is %.2fx " | |
| 281 "faster than FMUL_C and %.2fx faster than " | |
| 282 STRINGIZE(FMUL_FUNC) " (unaligned).\n", | |
| 283 total_time_optimized_aligned_ms, | |
| 284 total_time_c_ms / total_time_optimized_aligned_ms, | |
| 285 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms); | |
| 286 #endif | |
| 287 } | |
| 288 | |
| 289 #undef FMUL_FUNC | |
| 290 | |
| 291 } // namespace media | 142 } // namespace media |
| OLD | NEW |