| Index: media/base/vector_math_unittest.cc
|
| diff --git a/media/base/vector_math_unittest.cc b/media/base/vector_math_unittest.cc
|
| index e64c7c9061be5c1c2cc132e5fd3395e3e26ea15c..414998d525b200f216b1f29d839a9d6a658fe58d 100644
|
| --- a/media/base/vector_math_unittest.cc
|
| +++ b/media/base/vector_math_unittest.cc
|
| @@ -103,6 +103,38 @@ TEST_F(VectorMathTest, FMAC) {
|
| #endif
|
| }
|
|
|
| +// Ensure each optimized vector_math::FMUL() method returns the same value.
|
| +TEST_F(VectorMathTest, FMUL) {
|
| + static const float kResult = kInputFillValue * kScale;
|
| +
|
| + {
|
| + SCOPED_TRACE("FMUL");
|
| + FillTestVectors(kInputFillValue, kOutputFillValue);
|
| + vector_math::FMUL(
|
| + input_vector.get(), kScale, kVectorSize, output_vector.get());
|
| + VerifyOutput(kResult);
|
| + }
|
| +
|
| + {
|
| + SCOPED_TRACE("FMUL_C");
|
| + FillTestVectors(kInputFillValue, kOutputFillValue);
|
| + vector_math::FMUL_C(
|
| + input_vector.get(), kScale, kVectorSize, output_vector.get());
|
| + VerifyOutput(kResult);
|
| + }
|
| +
|
| +#if defined(ARCH_CPU_X86_FAMILY)
|
| + {
|
| + ASSERT_TRUE(base::CPU().has_sse());
|
| + SCOPED_TRACE("FMUL_SSE");
|
| + FillTestVectors(kInputFillValue, kOutputFillValue);
|
| + vector_math::FMUL_SSE(
|
| + input_vector.get(), kScale, kVectorSize, output_vector.get());
|
| + VerifyOutput(kResult);
|
| + }
|
| +#endif
|
| +}
|
| +
|
| // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks
|
| // were run with --vector-fmac-iterations=200000.
|
| TEST_F(VectorMathTest, FMACBenchmark) {
|
| @@ -156,4 +188,58 @@ TEST_F(VectorMathTest, FMACBenchmark) {
|
| #endif
|
| }
|
|
|
| +
|
| +// Benchmark for each optimized vector_math::FMUL() method. Original benchmarks
|
| +// were run with --vector-math-iterations=200000.
|
| +TEST_F(VectorMathTest, FMULBenchmark) {
|
| + static const int kBenchmarkIterations = BenchmarkIterations();
|
| +
|
| + printf("Benchmarking %d iterations:\n", kBenchmarkIterations);
|
| +
|
| + // Benchmark FMUL_C().
|
| + FillTestVectors(kInputFillValue, kOutputFillValue);
|
| + TimeTicks start = TimeTicks::HighResNow();
|
| + for (int i = 0; i < kBenchmarkIterations; ++i) {
|
| + vector_math::FMUL_C(
|
| + input_vector.get(), kScale, kVectorSize, output_vector.get());
|
| + }
|
| + double total_time_c_ms = (TimeTicks::HighResNow() - start).InMillisecondsF();
|
| + printf("FMUL_C took %.2fms.\n", total_time_c_ms);
|
| +
|
| +#if defined(ARCH_CPU_X86_FAMILY)
|
| + ASSERT_TRUE(base::CPU().has_sse());
|
| +
|
| + // Benchmark FMUL_SSE() with unaligned size.
|
| + ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /
|
| + sizeof(float)), 0U);
|
| + FillTestVectors(kInputFillValue, kOutputFillValue);
|
| + start = TimeTicks::HighResNow();
|
| + for (int j = 0; j < kBenchmarkIterations; ++j) {
|
| + vector_math::FMUL_SSE(
|
| + input_vector.get(), kScale, kVectorSize - 1, output_vector.get());
|
| + }
|
| + double total_time_sse_unaligned_ms =
|
| + (TimeTicks::HighResNow() - start).InMillisecondsF();
|
| + printf("FMUL_SSE (unaligned size) took %.2fms; which is %.2fx faster than"
|
| + " FMUL_C.\n", total_time_sse_unaligned_ms,
|
| + total_time_c_ms / total_time_sse_unaligned_ms);
|
| +
|
| + // Benchmark FMUL_SSE() with aligned size.
|
| + ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),
|
| + 0U);
|
| + FillTestVectors(kInputFillValue, kOutputFillValue);
|
| + start = TimeTicks::HighResNow();
|
| + for (int j = 0; j < kBenchmarkIterations; ++j) {
|
| + vector_math::FMUL_SSE(
|
| + input_vector.get(), kScale, kVectorSize, output_vector.get());
|
| + }
|
| + double total_time_sse_aligned_ms =
|
| + (TimeTicks::HighResNow() - start).InMillisecondsF();
|
| + printf("FMUL_SSE (aligned size) took %.2fms; which is %.2fx faster than"
|
| + " FMUL_C and %.2fx faster than FMUL_SSE (unaligned size).\n",
|
| + total_time_sse_aligned_ms, total_time_c_ms / total_time_sse_aligned_ms,
|
| + total_time_sse_unaligned_ms / total_time_sse_aligned_ms);
|
| +#endif
|
| +}
|
| +
|
| } // namespace media
|
|
|