| Index: media/base/audio_renderer_mixer.cc
|
| diff --git a/media/base/audio_renderer_mixer.cc b/media/base/audio_renderer_mixer.cc
|
| index 6d23faad35d6257274d0738ffdbb903104e7006f..93c39e7c30fd0b25f6a0fd6f00905897b4b8e03f 100644
|
| --- a/media/base/audio_renderer_mixer.cc
|
| +++ b/media/base/audio_renderer_mixer.cc
|
| @@ -4,9 +4,15 @@
|
|
|
| #include "media/base/audio_renderer_mixer.h"
|
|
|
| +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
|
| +#include <xmmintrin.h>
|
| +#endif
|
| +
|
| #include "base/bind.h"
|
| #include "base/bind_helpers.h"
|
| +#include "base/cpu.h"
|
| #include "base/logging.h"
|
| +#include "base/memory/aligned_memory.h"
|
| #include "media/audio/audio_util.h"
|
| #include "media/base/limits.h"
|
|
|
| @@ -42,7 +48,7 @@ AudioRendererMixer::~AudioRendererMixer() {
|
|
|
| // Clean up |mixer_input_audio_data_|.
|
| for (size_t i = 0; i < mixer_input_audio_data_.size(); ++i)
|
| - delete [] mixer_input_audio_data_[i];
|
| + base::AlignedFree(mixer_input_audio_data_[i]);
|
| mixer_input_audio_data_.clear();
|
|
|
| // Ensures that all mixer inputs have stopped themselves prior to destruction
|
| @@ -84,11 +90,12 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data,
|
| // Allocate staging area for each mixer input's audio data on first call. We
|
| // won't know how much to allocate until here because of resampling.
|
| if (mixer_input_audio_data_.size() == 0) {
|
| - // TODO(dalecurtis): If we switch to AVX/SSE optimization, we'll need to
|
| - // allocate these on 32-byte boundaries and ensure they're sized % 32 bytes.
|
| mixer_input_audio_data_.reserve(audio_data.size());
|
| - for (size_t i = 0; i < audio_data.size(); ++i)
|
| - mixer_input_audio_data_.push_back(new float[number_of_frames]);
|
| + for (size_t i = 0; i < audio_data.size(); ++i) {
|
| + // Allocate audio data with a 16-byte alignment for SSE optimizations.
|
| + mixer_input_audio_data_.push_back(static_cast<float*>(
|
| + base::AlignedAlloc(sizeof(float) * number_of_frames, 16)));
|
| + }
|
| mixer_input_audio_data_size_ = number_of_frames;
|
| }
|
|
|
| @@ -120,12 +127,9 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data,
|
| continue;
|
|
|
| // Volume adjust and mix each mixer input into |audio_data| after rendering.
|
| - // TODO(dalecurtis): Optimize with NEON/SSE/AVX vector_fmac from FFmpeg.
|
| for (size_t j = 0; j < audio_data.size(); ++j) {
|
| - float* dest = audio_data[j];
|
| - float* source = mixer_input_audio_data_[j];
|
| - for (int k = 0; k < frames_filled; ++k)
|
| - dest[k] += source[k] * static_cast<float>(volume);
|
| + VectorFMAC(
|
| + *mixer_input_audio_data_[j], volume, frames_filled, audio_data[j]);
|
| }
|
|
|
| // No need to clamp values as InterleaveFloatToInt() will take care of this
|
| @@ -143,4 +147,46 @@ void AudioRendererMixer::OnRenderError() {
|
| }
|
| }
|
|
|
| +void AudioRendererMixer::VectorFMAC(const float& src, float scale, int len,
|
| + float dest[]) {
|
| + // Rely on function level static initialization to keep VectorFMACProc
|
| + // selection thread safe.
|
| + typedef void (*VectorFMACProc)(const float& src, float scale, int len,
|
| + float dest[]);
|
| +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
|
| + static const VectorFMACProc kVectorFMACProc =
|
| + base::CPU().has_sse() ? VectorFMAC_SSE : VectorFMAC_C;
|
| +#else
|
| + static const VectorFMACProc kVectorFMACProc = VectorFMAC_C;
|
| +#endif
|
| +
|
| + return kVectorFMACProc(src, scale, len, dest);
|
| +}
|
| +
|
| +void AudioRendererMixer::VectorFMAC_C(const float& src, float scale, int len,
|
| + float dest[]) {
|
| + for (int i = 0; i < len; ++i)
|
| + dest[i] += (&src)[i] * scale;
|
| +}
|
| +
|
| +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
|
| +void AudioRendererMixer::VectorFMAC_SSE(const float& src, float scale, int len,
|
| + float dest[]) {
|
| + // Ensure |src| and |dest| are 16-byte aligned.
|
| + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(&src) & 0x0F);
|
| + DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & 0x0F);
|
| +
|
| + __m128 m_scale = _mm_set_ps1(scale);
|
| + int rem = len % 4;
|
| + for (int i = 0; i < len - rem; i += 4) {
|
| + _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i),
|
| + _mm_mul_ps(_mm_load_ps(&src + i), m_scale)));
|
| + }
|
| +
|
| + // Handle any remaining values that wouldn't fit in an SSE pass.
|
| + if (rem)
|
| + VectorFMAC_C(*(&src + len - rem), scale, rem, dest + len - rem);
|
| +}
|
| +#endif
|
| +
|
| } // namespace media
|
|
|