Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(394)

Unified Diff: media/base/sinc_resampler.cc

Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « media/base/sinc_resampler.h ('k') | media/base/sinc_resampler_perftest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: media/base/sinc_resampler.cc
diff --git a/media/base/sinc_resampler.cc b/media/base/sinc_resampler.cc
index cffb0c9d6dad86c02a410f6ded7233f756b7ea65..67ba750561959ace03424ec513fb19a5786658fe 100644
--- a/media/base/sinc_resampler.cc
+++ b/media/base/sinc_resampler.cc
@@ -82,17 +82,6 @@
#include <limits>
#include "base/logging.h"
-#include "build/build_config.h"
-
-#if defined(ARCH_CPU_X86_FAMILY)
-#include <xmmintrin.h>
-#define CONVOLVE_FUNC Convolve_SSE
-#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
-#include <arm_neon.h>
-#define CONVOLVE_FUNC Convolve_NEON
-#else
-#define CONVOLVE_FUNC Convolve_C
-#endif
namespace media {
@@ -191,7 +180,7 @@ void SincResampler::InitializeKernel() {
// Compute Blackman window, matching the offset of the sinc().
const float x = (i - subsample_offset) / kKernelSize;
const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +
- kA2 * cos(4.0 * M_PI * x));
+ kA2 * cos(4.0 * M_PI * x));
kernel_window_storage_[idx] = window;
// Compute the sinc with offset, then window the sinc() function and store
@@ -264,7 +253,7 @@ void SincResampler::Resample(int frames, float* destination) {
const double kernel_interpolation_factor =
virtual_offset_idx - offset_idx;
*destination++ =
- CONVOLVE_FUNC(input_ptr, k1, k2, kernel_interpolation_factor);
+ vector_math::Convolve(input_ptr, k1, k2, kernel_interpolation_factor);
// Advance the virtual index.
virtual_source_idx_ += io_sample_rate_ratio_;
@@ -309,91 +298,4 @@ double SincResampler::BufferedFrames() const {
return buffer_primed_ ? request_frames_ - virtual_source_idx_ : 0;
}
-float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
- const float* k2,
- double kernel_interpolation_factor) {
- float sum1 = 0;
- float sum2 = 0;
-
- // Generate a single output sample. Unrolling this loop hurt performance in
- // local testing.
- int n = kKernelSize;
- while (n--) {
- sum1 += *input_ptr * *k1++;
- sum2 += *input_ptr++ * *k2++;
- }
-
- // Linearly interpolate the two "convolutions".
- return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +
- kernel_interpolation_factor * sum2);
-}
-
-#if defined(ARCH_CPU_X86_FAMILY)
-float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,
- const float* k2,
- double kernel_interpolation_factor) {
- __m128 m_input;
- __m128 m_sums1 = _mm_setzero_ps();
- __m128 m_sums2 = _mm_setzero_ps();
-
- // Based on |input_ptr| alignment, we need to use loadu or load. Unrolling
- // these loops hurt performance in local testing.
- if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {
- for (int i = 0; i < kKernelSize; i += 4) {
- m_input = _mm_loadu_ps(input_ptr + i);
- m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
- m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
- }
- } else {
- for (int i = 0; i < kKernelSize; i += 4) {
- m_input = _mm_load_ps(input_ptr + i);
- m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
- m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
- }
- }
-
- // Linearly interpolate the two "convolutions".
- m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(
- static_cast<float>(1.0 - kernel_interpolation_factor)));
- m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(
- static_cast<float>(kernel_interpolation_factor)));
- m_sums1 = _mm_add_ps(m_sums1, m_sums2);
-
- // Sum components together.
- float result;
- m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
- _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(
- m_sums2, m_sums2, 1)));
-
- return result;
-}
-#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
-float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,
- const float* k2,
- double kernel_interpolation_factor) {
- float32x4_t m_input;
- float32x4_t m_sums1 = vmovq_n_f32(0);
- float32x4_t m_sums2 = vmovq_n_f32(0);
-
- const float* upper = input_ptr + kKernelSize;
- for (; input_ptr < upper; ) {
- m_input = vld1q_f32(input_ptr);
- input_ptr += 4;
- m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));
- k1 += 4;
- m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));
- k2 += 4;
- }
-
- // Linearly interpolate the two "convolutions".
- m_sums1 = vmlaq_f32(
- vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),
- m_sums2, vmovq_n_f32(kernel_interpolation_factor));
-
- // Sum components together.
- float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
- return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
-}
-#endif
-
} // namespace media
« no previous file with comments | « media/base/sinc_resampler.h ('k') | media/base/sinc_resampler_perftest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698