media/base/sinc_resampler.cc - Issue 10960023: Add ARM NEON intrinsic optimizations for SincResampler.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: media/base/sinc_resampler.cc

Issue 10960023: Add ARM NEON intrinsic optimizations for SincResampler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix NE issue for ARM. Created 8 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/base/sinc_resampler.cc

diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc

index 869af1b3738dd07f207fca840d079688e90280ba..9352fe3dd2f5895ed1aa9032f81d420c95fba5b2 100644

--- a/media/base/sinc_resampler.cc

+++ b/media/base/sinc_resampler.cc

@@ -36,13 +36,19 @@

#include "media/base/sinc_resampler.h"

-#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)

-#include <xmmintrin.h>

-#endif

#include <cmath>

#include "base/cpu.h"

#include "base/logging.h"

+#include "build/build_config.h"

+#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)

+#include <xmmintrin.h>

+#endif

+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

+#include <arm_neon.h>

+#endif

namespace media {

@@ -231,6 +237,8 @@ float SincResampler::Convolve(const float* input_ptr, const float* k1,

#if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)

static const ConvolveProc kConvolveProc =

base::CPU().has_sse() ? Convolve_SSE : Convolve_C;

+#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

+ static const ConvolveProc kConvolveProc = Convolve_NEON;

#else

static const ConvolveProc kConvolveProc = Convolve_C;

#endif

@@ -301,4 +309,33 @@ float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,

}

#endif

+#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

+float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,

+ const float* k2,

+ double kernel_interpolation_factor) {

+ float32x4_t m_input;

+ float32x4_t m_sums1 = vmovq_n_f32(0);

+ float32x4_t m_sums2 = vmovq_n_f32(0);

+ const float* upper = input_ptr + kKernelSize;

+ for (; input_ptr < upper; ) {

+ m_input = vld1q_f32(input_ptr);

+ input_ptr += 4;

+ m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));

+ k1 += 4;

+ m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));

+ k2 += 4;

+ }

+ // Linearly interpolate the two "convolutions".

+ m_sums1 = vmlaq_f32(

+ vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),

+ m_sums2, vmovq_n_f32(kernel_interpolation_factor));

+ // Sum components together.

+ float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));

+ return vget_lane_f32(vpadd_f32(m_half, m_half), 0);

+#endif

} // namespace media

« no previous file with comments | « media/base/sinc_resampler.h ('k') | media/base/sinc_resampler_unittest.cc » ('j') | no next file with comments »