| Index: media/base/sinc_resampler.cc
|
| diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc
|
| index 869af1b3738dd07f207fca840d079688e90280ba..9352fe3dd2f5895ed1aa9032f81d420c95fba5b2 100644
|
| --- a/media/base/sinc_resampler.cc
|
| +++ b/media/base/sinc_resampler.cc
|
| @@ -36,13 +36,19 @@
|
|
|
| #include "media/base/sinc_resampler.h"
|
|
|
| -#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
|
| -#include <xmmintrin.h>
|
| -#endif
|
| #include <cmath>
|
|
|
| #include "base/cpu.h"
|
| #include "base/logging.h"
|
| +#include "build/build_config.h"
|
| +
|
| +#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
|
| +#include <xmmintrin.h>
|
| +#endif
|
| +
|
| +#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
|
| +#include <arm_neon.h>
|
| +#endif
|
|
|
| namespace media {
|
|
|
| @@ -231,6 +237,8 @@ float SincResampler::Convolve(const float* input_ptr, const float* k1,
|
| #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
|
| static const ConvolveProc kConvolveProc =
|
| base::CPU().has_sse() ? Convolve_SSE : Convolve_C;
|
| +#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
|
| + static const ConvolveProc kConvolveProc = Convolve_NEON;
|
| #else
|
| static const ConvolveProc kConvolveProc = Convolve_C;
|
| #endif
|
| @@ -301,4 +309,33 @@ float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,
|
| }
|
| #endif
|
|
|
| +#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
|
| +float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,
|
| + const float* k2,
|
| + double kernel_interpolation_factor) {
|
| + float32x4_t m_input;
|
| + float32x4_t m_sums1 = vmovq_n_f32(0);
|
| + float32x4_t m_sums2 = vmovq_n_f32(0);
|
| +
|
| + const float* upper = input_ptr + kKernelSize;
|
| + for (; input_ptr < upper; ) {
|
| + m_input = vld1q_f32(input_ptr);
|
| + input_ptr += 4;
|
| + m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));
|
| + k1 += 4;
|
| + m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));
|
| + k2 += 4;
|
| + }
|
| +
|
| + // Linearly interpolate the two "convolutions".
|
| + m_sums1 = vmlaq_f32(
|
| + vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),
|
| + m_sums2, vmovq_n_f32(kernel_interpolation_factor));
|
| +
|
| + // Sum components together.
|
| + float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
|
| + return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
|
| +}
|
| +#endif
|
| +
|
| } // namespace media
|
|
|