media/base/audio_renderer_mixer_unittest.cc - Issue 10802005: Add SSE optimizations to AudioRendererMixer.

Unified Diff: media/base/audio_renderer_mixer_unittest.cc

Issue 10802005: Add SSE optimizations to AudioRendererMixer. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Comments! Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/base/audio_renderer_mixer_unittest.cc

diff --git a/media/base/audio_renderer_mixer_unittest.cc b/media/base/audio_renderer_mixer_unittest.cc

index 51d06e0d63fc4da7626600972a3189f8028c14c2..6be81dba23e5d24de12576ce1839ff2737282fcc 100644

--- a/media/base/audio_renderer_mixer_unittest.cc

+++ b/media/base/audio_renderer_mixer_unittest.cc

@@ -8,8 +8,11 @@

#include "base/bind.h"

#include "base/bind_helpers.h"

+#include "base/command_line.h"

+#include "base/memory/aligned_memory.h"

#include "base/memory/scoped_ptr.h"

#include "base/memory/scoped_vector.h"

+#include "base/string_number_conversions.h"

#include "media/base/audio_renderer_mixer.h"

#include "media/base/audio_renderer_mixer_input.h"

#include "media/base/fake_audio_render_callback.h"

@@ -17,6 +20,11 @@

#include "testing/gmock/include/gmock/gmock.h"

#include "testing/gtest/include/gtest/gtest.h"

+namespace switches {

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 why?

DaleCurtis 2012/07/27 21:23:42 Saw it done this way elsewhere. Changed.

+// Allows runtime adjustment of iterations for the VectorFMACBenchmark test.

+const char kVectorFMACIterations[] = "vector-fmac-iterations";

+} // namespace switches

namespace media {

// Parameters which control the many input case tests.

@@ -28,10 +36,131 @@ static const int kBitsPerChannel = 16;

static const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_STEREO;

static const int kHighLatencyBufferSize = 8192;

static const int kLowLatencyBufferSize = 256;

+static const int kSampleRate = 48000;

// Number of full sine wave cycles for each Render() call.

static const int kSineCycles = 4;

+// Ensure various optimized VectorFMAC() methods return the right value.

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 s/right/same/

DaleCurtis 2012/07/27 21:23:42 Done.

+TEST(AudioRendererMixerTest, VectorFMAC) {

+ // Initialize a dummy mixer.

+ scoped_refptr<MockAudioRendererSink> sink = new MockAudioRendererSink();

+ EXPECT_CALL(*sink, Start());

+ EXPECT_CALL(*sink, Stop());

+ AudioParameters params(

+ AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, kSampleRate,

+ kBitsPerChannel, kHighLatencyBufferSize);

+ AudioRendererMixer mixer(params, params, sink);

+ // Initialize input and output vectors.

+ scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector(

+ static_cast<float*>(

+ base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));

+ scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector(

+ static_cast<float*>(

+ base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));

+ static const float kScale = 0.5;

+ static const float kFillValue = 1.0;

+ // Fill input vector with 1.0, zero output vector

+ std::fill(input_vector.get(), input_vector.get() + kHighLatencyBufferSize,

+ kFillValue);

+ memset(output_vector.get(), 0, sizeof(float) * kHighLatencyBufferSize);

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 By init'd output with 0 you're avoiding testing th

DaleCurtis 2012/07/27 21:23:42 Done.

+ mixer.VectorFMAC_C(

+ input_vector.get(), kScale, kHighLatencyBufferSize, output_vector.get());

+ for(int i = 0; i < kHighLatencyBufferSize; ++i)

+ ASSERT_FLOAT_EQ(output_vector.get()[i], kScale);

+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)

+ // Reset vectors, and try with SSE.

+ std::fill(input_vector.get(), input_vector.get() + kHighLatencyBufferSize,

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 Why is this necessary?

DaleCurtis 2012/07/27 21:23:42 Whoops, don't need to reset input vector, but outp

+ kFillValue);

+ memset(output_vector.get(), 0, sizeof(float) * kHighLatencyBufferSize);

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 ditto

DaleCurtis 2012/07/27 21:23:42 Done.

+ mixer.VectorFMAC_SSE(

+ input_vector.get(), kScale, kHighLatencyBufferSize, output_vector.get());

+ for(int i = 0; i < kHighLatencyBufferSize; ++i)

+ ASSERT_FLOAT_EQ(output_vector.get()[i], kScale);

+#endif

+// Benchmark for the various VectorFMAC() methods. Make sure to build with

+// branding=Chrome so that DCHECKs are compiled out when benchmarking. Original

+// benchmarks were run with --vector-fmac-iterations=200000.

+TEST(AudioRendererMixerTest, VectorFMACBenchmark) {

+ // Initialize a dummy mixer.

+ scoped_refptr<MockAudioRendererSink> sink = new MockAudioRendererSink();

+ EXPECT_CALL(*sink, Start());

+ EXPECT_CALL(*sink, Stop());

+ AudioParameters params(

+ AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, kSampleRate,

+ kBitsPerChannel, kHighLatencyBufferSize);

+ AudioRendererMixer mixer(params, params, sink);

+ // Initialize input and output vectors.

+ scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> input_vector(

+ static_cast<float*>(

+ base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));

+ scoped_ptr_malloc<float, base::ScopedPtrAlignedFree> output_vector(

+ static_cast<float*>(

+ base::AlignedAlloc(sizeof(float) * kHighLatencyBufferSize, 16)));

+ // Retrieve benchmark iterations from command line.

+ int vector_fmac_iterations = 10;

+ std::string iterations(CommandLine::ForCurrentProcess()->GetSwitchValueASCII(

+ switches::kVectorFMACIterations));

+ if (!iterations.empty())

+ base::StringToInt(iterations, &vector_fmac_iterations);

+ printf("Benchmarking %d iterations:\n", vector_fmac_iterations);

+ // Benchmark VectorFMAC_C().

+ memset(input_vector.get(), 1, sizeof(float) * kHighLatencyBufferSize);

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 Are you intentionally testing inputs of inputs tha

DaleCurtis 2012/07/27 21:23:42 Yeah, just needed to initialize the array with som

+ memset(output_vector.get(), 0, sizeof(float) * kHighLatencyBufferSize);

+ base::TimeTicks start = base::TimeTicks::HighResNow();

+ for (int i = 0; i < vector_fmac_iterations; ++i) {

+ mixer.VectorFMAC_C(input_vector.get(), M_PI, kHighLatencyBufferSize,

+ output_vector.get());

+ }

+ double total_time_c_ms =

+ (base::TimeTicks::HighResNow() - start).InMillisecondsF();

+ printf("VectorFMAC_C took %.2fms.\n", total_time_c_ms);

+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)

+ // Benchmark VectorFMAC_SSE() with unaligned size; I.e., size % 4 != 0.

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 Comment here that the API insists the head *is* al

DaleCurtis 2012/07/27 21:23:42 Seems redundant at this point, it's already in the

+ ASSERT_NE((kHighLatencyBufferSize - 1) % 4, 0);

+ memset(input_vector.get(), 1, sizeof(float) * kHighLatencyBufferSize);

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 ditto

DaleCurtis 2012/07/27 21:23:42 Done.

+ memset(output_vector.get(), 0, sizeof(float) * kHighLatencyBufferSize);

+ start = base::TimeTicks::HighResNow();

+ for (int j = 0; j < vector_fmac_iterations; ++j) {

+ mixer.VectorFMAC_SSE(input_vector.get(), M_PI, kHighLatencyBufferSize - 1,

+ output_vector.get());

+ }

+ double total_time_sse_unaligned_ms =

+ (base::TimeTicks::HighResNow() - start).InMillisecondsF();

+ printf("VectorFMAC_SSE (unaligned size) took %.2fms; which is %.2fx faster"

+ " than VectorFMAC_C.\n", total_time_sse_unaligned_ms,

+ total_time_c_ms / total_time_sse_unaligned_ms);

+ // Benchmark VectorFMAC_SSE() with aligned size; I.e., size % 4 == 0.

+ ASSERT_EQ(kHighLatencyBufferSize % 4, 0);

+ memset(input_vector.get(), 1, sizeof(float) * kHighLatencyBufferSize);

Ami GONE FROM CHROMIUM 2012/07/27 17:33:11 ditto

DaleCurtis 2012/07/27 21:23:42 Done.

+ memset(output_vector.get(), 0, sizeof(float) * kHighLatencyBufferSize);

+ start = base::TimeTicks::HighResNow();

+ for (int j = 0; j < vector_fmac_iterations; ++j) {

+ mixer.VectorFMAC_SSE(input_vector.get(), M_PI, kHighLatencyBufferSize,

+ output_vector.get());

+ }

+ double total_time_sse_aligned_ms =

+ (base::TimeTicks::HighResNow() - start).InMillisecondsF();

+ printf("VectorFMAC_SSE (aligned size) took %.2fms; which is %.2fx faster than"

+ " VectorFMAC_C and %.2fx faster than VectorFMAC_SSE (unaligned size)."

+ "\n",

+ total_time_sse_aligned_ms, total_time_c_ms / total_time_sse_aligned_ms,

+ total_time_sse_unaligned_ms / total_time_sse_aligned_ms);

+#endif

// Tuple of <input sampling rate, output sampling rate, epsilon>.

typedef std::tr1::tuple<int, int, double> AudioRendererMixerTestData;

class AudioRendererMixerTest

« media/base/audio_renderer_mixer.cc ('K') | « media/base/audio_renderer_mixer.cc ('k') | no next file » | no next file with comments »