Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(872)

Unified Diff: media/base/sinc_resampler.cc

Issue 10960023: Add ARM NEON intrinsic optimizations for SincResampler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Comments. Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: media/base/sinc_resampler.cc
diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc
index 869af1b3738dd07f207fca840d079688e90280ba..be3e71d336ea100f61d06fea7f97b415b43a9e15 100644
--- a/media/base/sinc_resampler.cc
+++ b/media/base/sinc_resampler.cc
@@ -36,13 +36,19 @@
#include "media/base/sinc_resampler.h"
-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
-#include <xmmintrin.h>
-#endif
#include <cmath>
#include "base/cpu.h"
#include "base/logging.h"
+#include "build/build_config.h"
+
+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
+#include <xmmintrin.h>
+#endif
+
+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+#include <arm_neon.h>
+#endif
namespace media {
@@ -231,6 +237,8 @@ float SincResampler::Convolve(const float* input_ptr, const float* k1,
#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
static const ConvolveProc kConvolveProc =
base::CPU().has_sse() ? Convolve_SSE : Convolve_C;
+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+ static const ConvolveProc kConvolveProc = Convolve_NEON;
#else
static const ConvolveProc kConvolveProc = Convolve_C;
#endif
@@ -301,4 +309,29 @@ float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,
}
#endif
+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
+float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,
+ const float* k2,
+ double kernel_interpolation_factor) {
+ float32x4_t m_input;
+ float32x4_t m_sums1 = vmovq_n_f32(0);
+ float32x4_t m_sums2 = vmovq_n_f32(0);
+
+ for (int i = 0; i < kKernelSize; i += 4) {
+ m_input = vld1q_f32(input_ptr + i);
+ m_sums1 = vaddq_f32(m_sums1, vmulq_f32(m_input, vld1q_f32(k1 + i)));
+ m_sums2 = vaddq_f32(m_sums2, vmulq_f32(m_input, vld1q_f32(k2 + i)));
+ }
+
+ // Linearly interpolate the two "convolutions".
+ m_sums1 = vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor));
+ m_sums2 = vmulq_f32(m_sums2, vmovq_n_f32(kernel_interpolation_factor));
+ m_sums1 = vaddq_f32(m_sums1, m_sums2);
+
+ // Sum components together.
+ float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
+ return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
+}
+#endif
+
} // namespace media

Powered by Google App Engine
This is Rietveld 408576698