| Index: media/base/sinc_resampler.cc
|
| diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc
|
| index cffb0c9d6dad86c02a410f6ded7233f756b7ea65..67ba750561959ace03424ec513fb19a5786658fe 100644
|
| --- a/media/base/sinc_resampler.cc
|
| +++ b/media/base/sinc_resampler.cc
|
| @@ -82,17 +82,6 @@
|
| #include <limits>
|
|
|
| #include "base/logging.h"
|
| -#include "build/build_config.h"
|
| -
|
| -#if defined(ARCH_CPU_X86_FAMILY)
|
| -#include <xmmintrin.h>
|
| -#define CONVOLVE_FUNC Convolve_SSE
|
| -#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
|
| -#include <arm_neon.h>
|
| -#define CONVOLVE_FUNC Convolve_NEON
|
| -#else
|
| -#define CONVOLVE_FUNC Convolve_C
|
| -#endif
|
|
|
| namespace media {
|
|
|
| @@ -191,7 +180,7 @@ void SincResampler::InitializeKernel() {
|
| // Compute Blackman window, matching the offset of the sinc().
|
| const float x = (i - subsample_offset) / kKernelSize;
|
| const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +
|
| - kA2 * cos(4.0 * M_PI * x));
|
| + kA2 * cos(4.0 * M_PI * x));
|
| kernel_window_storage_[idx] = window;
|
|
|
| // Compute the sinc with offset, then window the sinc() function and store
|
| @@ -264,7 +253,7 @@ void SincResampler::Resample(int frames, float* destination) {
|
| const double kernel_interpolation_factor =
|
| virtual_offset_idx - offset_idx;
|
| *destination++ =
|
| - CONVOLVE_FUNC(input_ptr, k1, k2, kernel_interpolation_factor);
|
| + vector_math::Convolve(input_ptr, k1, k2, kernel_interpolation_factor);
|
|
|
| // Advance the virtual index.
|
| virtual_source_idx_ += io_sample_rate_ratio_;
|
| @@ -309,91 +298,4 @@ double SincResampler::BufferedFrames() const {
|
| return buffer_primed_ ? request_frames_ - virtual_source_idx_ : 0;
|
| }
|
|
|
| -float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
|
| - const float* k2,
|
| - double kernel_interpolation_factor) {
|
| - float sum1 = 0;
|
| - float sum2 = 0;
|
| -
|
| - // Generate a single output sample. Unrolling this loop hurt performance in
|
| - // local testing.
|
| - int n = kKernelSize;
|
| - while (n--) {
|
| - sum1 += *input_ptr * *k1++;
|
| - sum2 += *input_ptr++ * *k2++;
|
| - }
|
| -
|
| - // Linearly interpolate the two "convolutions".
|
| - return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +
|
| - kernel_interpolation_factor * sum2);
|
| -}
|
| -
|
| -#if defined(ARCH_CPU_X86_FAMILY)
|
| -float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,
|
| - const float* k2,
|
| - double kernel_interpolation_factor) {
|
| - __m128 m_input;
|
| - __m128 m_sums1 = _mm_setzero_ps();
|
| - __m128 m_sums2 = _mm_setzero_ps();
|
| -
|
| - // Based on |input_ptr| alignment, we need to use loadu or load. Unrolling
|
| - // these loops hurt performance in local testing.
|
| - if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {
|
| - for (int i = 0; i < kKernelSize; i += 4) {
|
| - m_input = _mm_loadu_ps(input_ptr + i);
|
| - m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
|
| - m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
|
| - }
|
| - } else {
|
| - for (int i = 0; i < kKernelSize; i += 4) {
|
| - m_input = _mm_load_ps(input_ptr + i);
|
| - m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
|
| - m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
|
| - }
|
| - }
|
| -
|
| - // Linearly interpolate the two "convolutions".
|
| - m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(
|
| - static_cast<float>(1.0 - kernel_interpolation_factor)));
|
| - m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(
|
| - static_cast<float>(kernel_interpolation_factor)));
|
| - m_sums1 = _mm_add_ps(m_sums1, m_sums2);
|
| -
|
| - // Sum components together.
|
| - float result;
|
| - m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
|
| - _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(
|
| - m_sums2, m_sums2, 1)));
|
| -
|
| - return result;
|
| -}
|
| -#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
|
| -float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,
|
| - const float* k2,
|
| - double kernel_interpolation_factor) {
|
| - float32x4_t m_input;
|
| - float32x4_t m_sums1 = vmovq_n_f32(0);
|
| - float32x4_t m_sums2 = vmovq_n_f32(0);
|
| -
|
| - const float* upper = input_ptr + kKernelSize;
|
| - for (; input_ptr < upper; ) {
|
| - m_input = vld1q_f32(input_ptr);
|
| - input_ptr += 4;
|
| - m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));
|
| - k1 += 4;
|
| - m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));
|
| - k2 += 4;
|
| - }
|
| -
|
| - // Linearly interpolate the two "convolutions".
|
| - m_sums1 = vmlaq_f32(
|
| - vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),
|
| - m_sums2, vmovq_n_f32(kernel_interpolation_factor));
|
| -
|
| - // Sum components together.
|
| - float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
|
| - return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
|
| -}
|
| -#endif
|
| -
|
| } // namespace media
|
|
|