Index: media/base/simd/sinc_resampler_sse.cc |
diff --git a/media/base/simd/sinc_resampler_sse.cc b/media/base/simd/sinc_resampler_sse.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f0aec1ce673c88038176b1baa79bae64f11b24b5 |
--- /dev/null |
+++ b/media/base/simd/sinc_resampler_sse.cc |
@@ -0,0 +1,48 @@ |
+// Copyright 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "media/base/sinc_resampler.h" |
+ |
+#include <xmmintrin.h> |
+ |
+namespace media { |
+ |
+float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1, |
+ const float* k2, |
+ double kernel_interpolation_factor) { |
+ __m128 m_input; |
+ __m128 m_sums1 = _mm_setzero_ps(); |
+ __m128 m_sums2 = _mm_setzero_ps(); |
+ |
+ // Based on |input_ptr| alignment, we need to use loadu or load. Unrolling |
+ // these loops hurt performance in local testing. |
+ if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) { |
+ for (int i = 0; i < kKernelSize; i += 4) { |
+ m_input = _mm_loadu_ps(input_ptr + i); |
+ m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i))); |
+ m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i))); |
+ } |
+ } else { |
+ for (int i = 0; i < kKernelSize; i += 4) { |
+ m_input = _mm_load_ps(input_ptr + i); |
+ m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i))); |
+ m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i))); |
+ } |
+ } |
+ |
+ // Linearly interpolate the two "convolutions". |
+ m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(1.0 - kernel_interpolation_factor)); |
+ m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(kernel_interpolation_factor)); |
+ m_sums1 = _mm_add_ps(m_sums1, m_sums2); |
+ |
+ // Sum components together. |
+ float result; |
+ m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1); |
+ _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps( |
+ m_sums2, m_sums2, 1))); |
+ |
+ return result; |
+} |
+ |
+} // namespace media |