Index: media/base/audio_renderer_mixer.cc |
diff --git a/media/base/audio_renderer_mixer.cc b/media/base/audio_renderer_mixer.cc |
index 6d23faad35d6257274d0738ffdbb903104e7006f..93c39e7c30fd0b25f6a0fd6f00905897b4b8e03f 100644 |
--- a/media/base/audio_renderer_mixer.cc |
+++ b/media/base/audio_renderer_mixer.cc |
@@ -4,9 +4,15 @@ |
#include "media/base/audio_renderer_mixer.h" |
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) |
+#include <xmmintrin.h> |
+#endif |
+ |
#include "base/bind.h" |
#include "base/bind_helpers.h" |
+#include "base/cpu.h" |
#include "base/logging.h" |
+#include "base/memory/aligned_memory.h" |
#include "media/audio/audio_util.h" |
#include "media/base/limits.h" |
@@ -42,7 +48,7 @@ AudioRendererMixer::~AudioRendererMixer() { |
// Clean up |mixer_input_audio_data_|. |
for (size_t i = 0; i < mixer_input_audio_data_.size(); ++i) |
- delete [] mixer_input_audio_data_[i]; |
+ base::AlignedFree(mixer_input_audio_data_[i]); |
mixer_input_audio_data_.clear(); |
// Ensures that all mixer inputs have stopped themselves prior to destruction |
@@ -84,11 +90,12 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data, |
// Allocate staging area for each mixer input's audio data on first call. We |
// won't know how much to allocate until here because of resampling. |
if (mixer_input_audio_data_.size() == 0) { |
- // TODO(dalecurtis): If we switch to AVX/SSE optimization, we'll need to |
- // allocate these on 32-byte boundaries and ensure they're sized % 32 bytes. |
mixer_input_audio_data_.reserve(audio_data.size()); |
- for (size_t i = 0; i < audio_data.size(); ++i) |
- mixer_input_audio_data_.push_back(new float[number_of_frames]); |
+ for (size_t i = 0; i < audio_data.size(); ++i) { |
+ // Allocate audio data with a 16-byte alignment for SSE optimizations. |
+ mixer_input_audio_data_.push_back(static_cast<float*>( |
+ base::AlignedAlloc(sizeof(float) * number_of_frames, 16))); |
+ } |
mixer_input_audio_data_size_ = number_of_frames; |
} |
@@ -120,12 +127,9 @@ void AudioRendererMixer::ProvideInput(const std::vector<float*>& audio_data, |
continue; |
// Volume adjust and mix each mixer input into |audio_data| after rendering. |
- // TODO(dalecurtis): Optimize with NEON/SSE/AVX vector_fmac from FFmpeg. |
for (size_t j = 0; j < audio_data.size(); ++j) { |
- float* dest = audio_data[j]; |
- float* source = mixer_input_audio_data_[j]; |
- for (int k = 0; k < frames_filled; ++k) |
- dest[k] += source[k] * static_cast<float>(volume); |
+ VectorFMAC( |
+ *mixer_input_audio_data_[j], volume, frames_filled, audio_data[j]); |
} |
// No need to clamp values as InterleaveFloatToInt() will take care of this |
@@ -143,4 +147,46 @@ void AudioRendererMixer::OnRenderError() { |
} |
} |
+void AudioRendererMixer::VectorFMAC(const float& src, float scale, int len, |
+ float dest[]) { |
+ // Rely on function level static initialization to keep VectorFMACProc |
+ // selection thread safe. |
+ typedef void (*VectorFMACProc)(const float& src, float scale, int len, |
+ float dest[]); |
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) |
+ static const VectorFMACProc kVectorFMACProc = |
+ base::CPU().has_sse() ? VectorFMAC_SSE : VectorFMAC_C; |
+#else |
+ static const VectorFMACProc kVectorFMACProc = VectorFMAC_C; |
+#endif |
+ |
+ return kVectorFMACProc(src, scale, len, dest); |
+} |
+ |
+void AudioRendererMixer::VectorFMAC_C(const float& src, float scale, int len, |
+ float dest[]) { |
+ for (int i = 0; i < len; ++i) |
+ dest[i] += (&src)[i] * scale; |
+} |
+ |
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) |
+void AudioRendererMixer::VectorFMAC_SSE(const float& src, float scale, int len, |
+ float dest[]) { |
+ // Ensure |src| and |dest| are 16-byte aligned. |
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(&src) & 0x0F); |
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) & 0x0F); |
+ |
+ __m128 m_scale = _mm_set_ps1(scale); |
+ int rem = len % 4; |
+ for (int i = 0; i < len - rem; i += 4) { |
+ _mm_store_ps(dest + i, _mm_add_ps(_mm_load_ps(dest + i), |
+ _mm_mul_ps(_mm_load_ps(&src + i), m_scale))); |
+ } |
+ |
+ // Handle any remaining values that wouldn't fit in an SSE pass. |
+ if (rem) |
+ VectorFMAC_C(*(&src + len - rem), scale, rem, dest + len - rem); |
+} |
+#endif |
+ |
} // namespace media |