| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. |
| 6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES |
| 7 | 7 |
| 8 #include "media/filters/wsola_internals.h" | 8 #include "media/filters/wsola_internals.h" |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| 11 #include <cmath> | 11 #include <cmath> |
| 12 #include <limits> | 12 #include <limits> |
| 13 #include <memory> | 13 #include <memory> |
| 14 | 14 |
| 15 #include "base/logging.h" | 15 #include "base/logging.h" |
| 16 #include "media/base/audio_bus.h" | 16 #include "media/base/audio_bus.h" |
| 17 | 17 #include "media/base/vector_math.h" |
| 18 #if defined(ARCH_CPU_X86_FAMILY) | |
| 19 #define USE_SIMD 1 | |
| 20 #include <xmmintrin.h> | |
| 21 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | |
| 22 #define USE_SIMD 1 | |
| 23 #include <arm_neon.h> | |
| 24 #endif | |
| 25 | 18 |
| 26 namespace media { | 19 namespace media { |
| 27 | 20 |
| 28 namespace internal { | 21 namespace internal { |
| 29 | 22 |
| 30 bool InInterval(int n, Interval q) { | 23 bool InInterval(int n, Interval q) { |
| 31 return n >= q.first && n <= q.second; | 24 return n >= q.first && n <= q.second; |
| 32 } | 25 } |
| 33 | 26 |
| 34 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b, | 27 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b, |
| (...skipping 14 matching lines...) Expand all Loading... |
| 49 const AudioBus* b, | 42 const AudioBus* b, |
| 50 int frame_offset_b, | 43 int frame_offset_b, |
| 51 int num_frames, | 44 int num_frames, |
| 52 float* dot_product) { | 45 float* dot_product) { |
| 53 DCHECK_EQ(a->channels(), b->channels()); | 46 DCHECK_EQ(a->channels(), b->channels()); |
| 54 DCHECK_GE(frame_offset_a, 0); | 47 DCHECK_GE(frame_offset_a, 0); |
| 55 DCHECK_GE(frame_offset_b, 0); | 48 DCHECK_GE(frame_offset_b, 0); |
| 56 DCHECK_LE(frame_offset_a + num_frames, a->frames()); | 49 DCHECK_LE(frame_offset_a + num_frames, a->frames()); |
| 57 DCHECK_LE(frame_offset_b + num_frames, b->frames()); | 50 DCHECK_LE(frame_offset_b + num_frames, b->frames()); |
| 58 | 51 |
| 59 // SIMD optimized variants can provide a massive speedup to this operation. | |
| 60 #if defined(USE_SIMD) | |
| 61 const int rem = num_frames % 4; | |
| 62 const int last_index = num_frames - rem; | |
| 63 const int channels = a->channels(); | 52 const int channels = a->channels(); |
| 64 for (int ch = 0; ch < channels; ++ch) { | 53 for (int ch = 0; ch < channels; ++ch) { |
| 65 const float* a_src = a->channel(ch) + frame_offset_a; | 54 dot_product[ch] = |
| 66 const float* b_src = b->channel(ch) + frame_offset_b; | 55 vector_math::DotProduct(a->channel(ch) + frame_offset_a, |
| 67 | 56 b->channel(ch) + frame_offset_b, num_frames); |
| 68 #if defined(ARCH_CPU_X86_FAMILY) | |
| 69 // First sum all components. | |
| 70 __m128 m_sum = _mm_setzero_ps(); | |
| 71 for (int s = 0; s < last_index; s += 4) { | |
| 72 m_sum = _mm_add_ps( | |
| 73 m_sum, _mm_mul_ps(_mm_loadu_ps(a_src + s), _mm_loadu_ps(b_src + s))); | |
| 74 } | |
| 75 | |
| 76 // Reduce to a single float for this channel. Sadly, SSE1,2 doesn't have a | |
| 77 // horizontal sum function, so we have to condense manually. | |
| 78 m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum); | |
| 79 _mm_store_ss(dot_product + ch, | |
| 80 _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1))); | |
| 81 #elif defined(ARCH_CPU_ARM_FAMILY) | |
| 82 // First sum all components. | |
| 83 float32x4_t m_sum = vmovq_n_f32(0); | |
| 84 for (int s = 0; s < last_index; s += 4) | |
| 85 m_sum = vmlaq_f32(m_sum, vld1q_f32(a_src + s), vld1q_f32(b_src + s)); | |
| 86 | |
| 87 // Reduce to a single float for this channel. | |
| 88 float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum)); | |
| 89 dot_product[ch] = vget_lane_f32(vpadd_f32(m_half, m_half), 0); | |
| 90 #endif | |
| 91 } | |
| 92 | |
| 93 if (!rem) | |
| 94 return; | |
| 95 num_frames = rem; | |
| 96 frame_offset_a += last_index; | |
| 97 frame_offset_b += last_index; | |
| 98 #else | |
| 99 memset(dot_product, 0, sizeof(*dot_product) * a->channels()); | |
| 100 #endif // defined(USE_SIMD) | |
| 101 | |
| 102 // C version is required to handle remainder of frames (% 4 != 0) | |
| 103 for (int k = 0; k < a->channels(); ++k) { | |
| 104 const float* ch_a = a->channel(k) + frame_offset_a; | |
| 105 const float* ch_b = b->channel(k) + frame_offset_b; | |
| 106 for (int n = 0; n < num_frames; ++n) | |
| 107 dot_product[k] += *ch_a++ * *ch_b++; | |
| 108 } | 57 } |
| 109 } | 58 } |
| 110 | 59 |
| 111 void MultiChannelMovingBlockEnergies(const AudioBus* input, | 60 void MultiChannelMovingBlockEnergies(const AudioBus* input, |
| 112 int frames_per_block, | 61 int frames_per_block, |
| 113 float* energy) { | 62 float* energy) { |
| 114 int num_blocks = input->frames() - (frames_per_block - 1); | 63 int num_blocks = input->frames() - (frames_per_block - 1); |
| 115 int channels = input->channels(); | 64 int channels = input->channels(); |
| 116 | 65 |
| 117 for (int k = 0; k < input->channels(); ++k) { | 66 for (int k = 0; k < input->channels(); ++k) { |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 311 void GetSymmetricHanningWindow(int window_length, float* window) { | 260 void GetSymmetricHanningWindow(int window_length, float* window) { |
| 312 const float scale = 2.0f * M_PI / window_length; | 261 const float scale = 2.0f * M_PI / window_length; |
| 313 for (int n = 0; n < window_length; ++n) | 262 for (int n = 0; n < window_length; ++n) |
| 314 window[n] = 0.5f * (1.0f - cosf(n * scale)); | 263 window[n] = 0.5f * (1.0f - cosf(n * scale)); |
| 315 } | 264 } |
| 316 | 265 |
| 317 } // namespace internal | 266 } // namespace internal |
| 318 | 267 |
| 319 } // namespace media | 268 } // namespace media |
| 320 | 269 |
| OLD | NEW |