Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Unified Diff: media/filters/wsola_internals.cc

Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « media/base/vector_math_unittest.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: media/filters/wsola_internals.cc
diff --git a/media/filters/wsola_internals.cc b/media/filters/wsola_internals.cc
index 9e12f032f3cc021a151c933aec88a2fe82e31bf7..353232679af01ce70dbe291bff8f2d4e02420f47 100644
--- a/media/filters/wsola_internals.cc
+++ b/media/filters/wsola_internals.cc
@@ -14,14 +14,7 @@
#include "base/logging.h"
#include "media/base/audio_bus.h"
-
-#if defined(ARCH_CPU_X86_FAMILY)
-#define USE_SIMD 1
-#include <xmmintrin.h>
-#elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
-#define USE_SIMD 1
-#include <arm_neon.h>
-#endif
+#include "media/base/vector_math.h"
namespace media {
@@ -56,55 +49,11 @@ void MultiChannelDotProduct(const AudioBus* a,
DCHECK_LE(frame_offset_a + num_frames, a->frames());
DCHECK_LE(frame_offset_b + num_frames, b->frames());
-// SIMD optimized variants can provide a massive speedup to this operation.
-#if defined(USE_SIMD)
- const int rem = num_frames % 4;
- const int last_index = num_frames - rem;
const int channels = a->channels();
for (int ch = 0; ch < channels; ++ch) {
- const float* a_src = a->channel(ch) + frame_offset_a;
- const float* b_src = b->channel(ch) + frame_offset_b;
-
-#if defined(ARCH_CPU_X86_FAMILY)
- // First sum all components.
- __m128 m_sum = _mm_setzero_ps();
- for (int s = 0; s < last_index; s += 4) {
- m_sum = _mm_add_ps(
- m_sum, _mm_mul_ps(_mm_loadu_ps(a_src + s), _mm_loadu_ps(b_src + s)));
- }
-
- // Reduce to a single float for this channel. Sadly, SSE1,2 doesn't have a
- // horizontal sum function, so we have to condense manually.
- m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum);
- _mm_store_ss(dot_product + ch,
- _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1)));
-#elif defined(ARCH_CPU_ARM_FAMILY)
- // First sum all components.
- float32x4_t m_sum = vmovq_n_f32(0);
- for (int s = 0; s < last_index; s += 4)
- m_sum = vmlaq_f32(m_sum, vld1q_f32(a_src + s), vld1q_f32(b_src + s));
-
- // Reduce to a single float for this channel.
- float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum));
- dot_product[ch] = vget_lane_f32(vpadd_f32(m_half, m_half), 0);
-#endif
- }
-
- if (!rem)
- return;
- num_frames = rem;
- frame_offset_a += last_index;
- frame_offset_b += last_index;
-#else
- memset(dot_product, 0, sizeof(*dot_product) * a->channels());
-#endif // defined(USE_SIMD)
-
- // C version is required to handle remainder of frames (% 4 != 0)
- for (int k = 0; k < a->channels(); ++k) {
- const float* ch_a = a->channel(k) + frame_offset_a;
- const float* ch_b = b->channel(k) + frame_offset_b;
- for (int n = 0; n < num_frames; ++n)
- dot_product[k] += *ch_a++ * *ch_b++;
+ dot_product[ch] =
+ vector_math::DotProduct(a->channel(ch) + frame_offset_a,
+ b->channel(ch) + frame_offset_b, num_frames);
}
}
« no previous file with comments | « media/base/vector_math_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698