media/base/vector_math_perftest.cc - Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.

Unified Diff: media/base/vector_math_perftest.cc

Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: media/base/vector_math_perftest.cc

diff --git a/media/base/vector_math_perftest.cc b/media/base/vector_math_perftest.cc

index 59b259dc2dd51a605b2598e7c416ae9f1a51cc6e..36465fb87f02f1138ee8dceab766e325b1093b0e 100644

--- a/media/base/vector_math_perftest.cc

+++ b/media/base/vector_math_perftest.cc

@@ -4,10 +4,13 @@

#include <memory>

+#include "base/bind.h"

+#include "base/cpu.h"

#include "base/macros.h"

#include "base/memory/aligned_memory.h"

#include "base/time/time.h"

#include "build/build_config.h"

+#include "media/base/sinc_resampler.h"

#include "media/base/vector_math.h"

#include "media/base/vector_math_testing.h"

#include "testing/gtest/include/gtest/gtest.h"

@@ -23,6 +26,13 @@ static const int kEWMABenchmarkIterations = 50000;

static const float kScale = 0.5;

static const int kVectorSize = 8192;

+static const int kSincResamplerBenchmarkIterations = 50000000;

+static const double kSampleRateRatio = 192000.0 / 44100.0;

+static const double kKernelInterpolationFactor = 0.5;

+// Helper function to provide no input to SincResampler's Convolve benchmark.

+static void DoNothing(int frames, float* destination) {}

class VectorMathPerfTest : public testing::Test {

public:

VectorMathPerfTest() {

@@ -35,25 +45,36 @@ class VectorMathPerfTest : public testing::Test {

fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);

}

+ void RunBenchmark(float (*fn)(const float[], const float[], int),

+ bool aligned,

+ const std::string& test_name,

+ const std::string& trace_name) {

+ TimeTicks start = TimeTicks::Now();

+ for (int i = 0; i < kBenchmarkIterations; ++i) {

+ fn(input_vector_.get(), output_vector_.get(),

+ kVectorSize - (aligned ? 0 : 1));

+ }

+ double total_time_milliseconds =

+ (TimeTicks::Now() - start).InMillisecondsF();

+ perf_test::PrintResult(test_name, "", trace_name,

+ kBenchmarkIterations / total_time_milliseconds,

+ "runs/ms", true);

+ }

void RunBenchmark(void (*fn)(const float[], float, int, float[]),

bool aligned,

const std::string& test_name,

const std::string& trace_name) {

TimeTicks start = TimeTicks::Now();

for (int i = 0; i < kBenchmarkIterations; ++i) {

- fn(input_vector_.get(),

- kScale,

- kVectorSize - (aligned ? 0 : 1),

+ fn(input_vector_.get(), kScale, kVectorSize - (aligned ? 0 : 1),

output_vector_.get());

}

double total_time_milliseconds =

(TimeTicks::Now() - start).InMillisecondsF();

- perf_test::PrintResult(test_name,

- "",

- trace_name,

+ perf_test::PrintResult(test_name, "", trace_name,

kBenchmarkIterations / total_time_milliseconds,

- "runs/ms",

- true);

+ "runs/ms", true);

}

void RunBenchmark(

@@ -67,12 +88,29 @@ class VectorMathPerfTest : public testing::Test {

}

double total_time_milliseconds =

(TimeTicks::Now() - start).InMillisecondsF();

- perf_test::PrintResult(test_name,

- "",

- trace_name,

+ perf_test::PrintResult(test_name, "", trace_name,

kEWMABenchmarkIterations / total_time_milliseconds,

- "runs/ms",

- true);

+ "runs/ms", true);

+ }

+ void RunBenchmark(

+ SincResampler* resampler,

+ float (*convolve_fn)(const float*, const float*, const float*, double),

+ bool aligned,

+ const std::string& trace_name) {

+ base::TimeTicks start = base::TimeTicks::Now();

+ for (int i = 0; i < kSincResamplerBenchmarkIterations; ++i) {

+ convolve_fn(resampler->get_kernel_for_testing() + (aligned ? 0 : 1),

+ resampler->get_kernel_for_testing(),

+ kKernelInterpolationFactor);

+ }

+ double total_time_milliseconds =

+ (base::TimeTicks::Now() - start).InMillisecondsF();

+ perf_test::PrintResult(

+ "sinc_resampler_convolve", "", trace_name,

+ kSincResamplerBenchmarkIterations / total_time_milliseconds, "runs/ms",

+ true);

}

protected:

@@ -84,76 +122,144 @@ class VectorMathPerfTest : public testing::Test {

// Define platform dependent function names for SIMD optimized methods.

#if defined(ARCH_CPU_X86_FAMILY)

+#define CONVOLVE_FUNC Convolve_SSE

+#define DOTPRODUCT_FUNC DotProduct_SSE

+#define DOTPRODUCT_FUNC2 DotProduct_AVX

#define FMAC_FUNC FMAC_SSE

#define FMUL_FUNC FMUL_SSE

+#define FMAC_FUNC2 FMAC_AVX

#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE

#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

+#define CONVOLVE_FUNC Convolve_NEON

+#define DOTPRODUCT_FUNC DotProduct_NEON

#define FMAC_FUNC FMAC_NEON

#define FMUL_FUNC FMUL_NEON

#define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON

#endif

+// Benchmark for the various Convolve() methods. Make sure to build with

+// branding=Chrome so that DCHECKs are compiled out when benchmarking.

+TEST_F(VectorMathPerfTest, Convolve) {

+ SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,

+ base::Bind(&DoNothing));

+ RunBenchmark(&resampler, vector_math::Convolve_C, true,

+ "unoptimized_aligned");

+#if defined(CONVOLVE_FUNC)

+ RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, true,

+ "optimized_aligned");

+ RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, false,

+ "optimized_unaligned");

+#endif

+// Benchmark for each optimized vector_math::FMAC() method.

+TEST_F(VectorMathPerfTest, DotProduct) {

+ // Benchmark DotProduct_C().

+ RunBenchmark(vector_math::DotProduct_C, true, "vector_math_dotproduct",

+ "unoptimized");

+#if defined(DOTPRODUCT_FUNC)

+ // Benchmark DOTPRODUCT_FUNC() with unaligned size.

+ ASSERT_NE(

+ (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::DOTPRODUCT_FUNC, false, "vector_math_dotproduct",

+ "optimized_unaligned");

+ // Benchmark DOTPRODUCT_FUNC() with aligned size.

+ ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::DOTPRODUCT_FUNC, true, "vector_math_dotproduct",

+ "optimized_aligned");

+#if defined(FMAC_FUNC2)

+ // Benchmark DOTPRODUCT_FUNC() with unaligned size.

+ if (!base::CPU().has_avx())

+ return;

+ ASSERT_NE(

+ (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::DOTPRODUCT_FUNC2, false, "vector_math_dotproduct",

+ "optimized2_unaligned");

+ // Benchmark FMAC_FUNC() with aligned size.

+ ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::DOTPRODUCT_FUNC2, true, "vector_math_dotproduct",

+ "optimized2_aligned");

+#endif

// Benchmark for each optimized vector_math::FMAC() method.

TEST_F(VectorMathPerfTest, FMAC) {

// Benchmark FMAC_C().

- RunBenchmark(

- vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");

+ RunBenchmark(vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");

#if defined(FMAC_FUNC)

// Benchmark FMAC_FUNC() with unaligned size.

- ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /

- sizeof(float)), 0U);

- RunBenchmark(

- vector_math::FMAC_FUNC, false, "vector_math_fmac", "optimized_unaligned");

+ ASSERT_NE(

+ (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::FMAC_FUNC, false, "vector_math_fmac",

+ "optimized_unaligned");

+ // Benchmark FMAC_FUNC() with aligned size.

+ ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::FMAC_FUNC, true, "vector_math_fmac",

+ "optimized_aligned");

+#if defined(FMAC_FUNC2)

+ // Benchmark FMAC_FUNC() with unaligned size.

+ if (!base::CPU().has_avx())

+ return;

+ ASSERT_NE(

+ (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::FMAC_FUNC2, false, "vector_math_fmac",

+ "optimized2_unaligned");

// Benchmark FMAC_FUNC() with aligned size.

ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

0U);

- RunBenchmark(

- vector_math::FMAC_FUNC, true, "vector_math_fmac", "optimized_aligned");

+ RunBenchmark(vector_math::FMAC_FUNC2, true, "vector_math_fmac",

+ "optimized2_aligned");

+#endif

#endif

}

// Benchmark for each optimized vector_math::FMUL() method.

TEST_F(VectorMathPerfTest, FMUL) {

// Benchmark FMUL_C().

- RunBenchmark(

- vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");

+ RunBenchmark(vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");

#if defined(FMUL_FUNC)

// Benchmark FMUL_FUNC() with unaligned size.

- ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /

- sizeof(float)), 0U);

- RunBenchmark(

- vector_math::FMUL_FUNC, false, "vector_math_fmul", "optimized_unaligned");

+ ASSERT_NE(

+ (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::FMUL_FUNC, false, "vector_math_fmul",

+ "optimized_unaligned");

// Benchmark FMUL_FUNC() with aligned size.

ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

0U);

- RunBenchmark(

- vector_math::FMUL_FUNC, true, "vector_math_fmul", "optimized_aligned");

+ RunBenchmark(vector_math::FMUL_FUNC, true, "vector_math_fmul",

+ "optimized_aligned");

#endif

}

// Benchmark for each optimized vector_math::EWMAAndMaxPower() method.

TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {

// Benchmark EWMAAndMaxPower_C().

- RunBenchmark(vector_math::EWMAAndMaxPower_C,

- kVectorSize,

- "vector_math_ewma_and_max_power",

- "unoptimized");

+ RunBenchmark(vector_math::EWMAAndMaxPower_C, kVectorSize,

+ "vector_math_ewma_and_max_power", "unoptimized");

#if defined(EWMAAndMaxPower_FUNC)

// Benchmark EWMAAndMaxPower_FUNC() with unaligned size.

- ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /

- sizeof(float)), 0U);

- RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,

- kVectorSize - 1,

- "vector_math_ewma_and_max_power",

- "optimized_unaligned");

+ ASSERT_NE(

+ (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

+ 0U);

+ RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize - 1,

+ "vector_math_ewma_and_max_power", "optimized_unaligned");

// Benchmark EWMAAndMaxPower_FUNC() with aligned size.

ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

0U);

- RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,

- kVectorSize,

- "vector_math_ewma_and_max_power",

- "optimized_aligned");

+ RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize,

+ "vector_math_ewma_and_max_power", "optimized_aligned");

#endif

}

-} // namespace media

+} // namespace media

« no previous file with comments | « media/base/vector_math_avx.cc ('k') | media/base/vector_math_testing.h » ('j') | no next file with comments »