| OLD | NEW | 
|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 // MSVC++ requires this to be set before any other includes to get M_PI. | 5 // MSVC++ requires this to be set before any other includes to get M_PI. | 
| 6 #define _USE_MATH_DEFINES | 6 #define _USE_MATH_DEFINES | 
| 7 | 7 | 
| 8 #include "media/filters/wsola_internals.h" | 8 #include "media/filters/wsola_internals.h" | 
| 9 | 9 | 
| 10 #include <algorithm> | 10 #include <algorithm> | 
| 11 #include <cmath> | 11 #include <cmath> | 
| 12 #include <limits> | 12 #include <limits> | 
| 13 #include <memory> | 13 #include <memory> | 
| 14 | 14 | 
| 15 #include "base/logging.h" | 15 #include "base/logging.h" | 
| 16 #include "media/base/audio_bus.h" | 16 #include "media/base/audio_bus.h" | 
| 17 | 17 | 
|  | 18 #if defined(ARCH_CPU_X86_FAMILY) | 
|  | 19 #define USE_SIMD 1 | 
|  | 20 #include <xmmintrin.h> | 
|  | 21 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 
|  | 22 #define USE_SIMD 1 | 
|  | 23 #include <arm_neon.h> | 
|  | 24 #endif | 
|  | 25 | 
| 18 namespace media { | 26 namespace media { | 
| 19 | 27 | 
| 20 namespace internal { | 28 namespace internal { | 
| 21 | 29 | 
| 22 bool InInterval(int n, Interval q) { | 30 bool InInterval(int n, Interval q) { | 
| 23   return n >= q.first && n <= q.second; | 31   return n >= q.first && n <= q.second; | 
| 24 } | 32 } | 
| 25 | 33 | 
| 26 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b, | 34 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b, | 
| 27                                     const float* energy_a, | 35                                     const float* energy_a, | 
| (...skipping 13 matching lines...) Expand all  Loading... | 
| 41                             const AudioBus* b, | 49                             const AudioBus* b, | 
| 42                             int frame_offset_b, | 50                             int frame_offset_b, | 
| 43                             int num_frames, | 51                             int num_frames, | 
| 44                             float* dot_product) { | 52                             float* dot_product) { | 
| 45   DCHECK_EQ(a->channels(), b->channels()); | 53   DCHECK_EQ(a->channels(), b->channels()); | 
| 46   DCHECK_GE(frame_offset_a, 0); | 54   DCHECK_GE(frame_offset_a, 0); | 
| 47   DCHECK_GE(frame_offset_b, 0); | 55   DCHECK_GE(frame_offset_b, 0); | 
| 48   DCHECK_LE(frame_offset_a + num_frames, a->frames()); | 56   DCHECK_LE(frame_offset_a + num_frames, a->frames()); | 
| 49   DCHECK_LE(frame_offset_b + num_frames, b->frames()); | 57   DCHECK_LE(frame_offset_b + num_frames, b->frames()); | 
| 50 | 58 | 
|  | 59 // SIMD optimized variants can provide a massive speedup to this operation. | 
|  | 60 #if defined(USE_SIMD) | 
|  | 61   const int rem = num_frames % 4; | 
|  | 62   const int last_index = num_frames - rem; | 
|  | 63   const int channels = a->channels(); | 
|  | 64   for (int ch = 0; ch < channels; ++ch) { | 
|  | 65     const float* a_src = a->channel(ch) + frame_offset_a; | 
|  | 66     const float* b_src = b->channel(ch) + frame_offset_b; | 
|  | 67 | 
|  | 68 #if defined(ARCH_CPU_X86_FAMILY) | 
|  | 69     // First sum all components. | 
|  | 70     __m128 m_sum = _mm_setzero_ps(); | 
|  | 71     for (int s = 0; s < last_index; s += 4) { | 
|  | 72       m_sum = _mm_add_ps( | 
|  | 73           m_sum, _mm_mul_ps(_mm_loadu_ps(a_src + s), _mm_loadu_ps(b_src + s))); | 
|  | 74     } | 
|  | 75 | 
|  | 76     // Reduce to a single float for this channel. Sadly, SSE1,2 doesn't have a | 
|  | 77     // horizontal sum function, so we have to condense manually. | 
|  | 78     m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum); | 
|  | 79     _mm_store_ss(dot_product + ch, | 
|  | 80                  _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1))); | 
|  | 81 #elif defined(ARCH_CPU_ARM_FAMILY) | 
|  | 82     // First sum all components. | 
|  | 83     float32x4_t m_sum = vmovq_n_f32(0); | 
|  | 84     for (int s = 0; s < last_index; s += 4) | 
|  | 85       m_sum = vmlaq_f32(m_sum, vld1q_f32(a_src + s), vld1q_f32(b_src + s)); | 
|  | 86 | 
|  | 87     // Reduce to a single float for this channel. | 
|  | 88     float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum)); | 
|  | 89     dot_product[ch] = vget_lane_f32(vpadd_f32(m_half, m_half), 0); | 
|  | 90 #endif | 
|  | 91   } | 
|  | 92 | 
|  | 93   if (!rem) | 
|  | 94     return; | 
|  | 95   num_frames = rem; | 
|  | 96   frame_offset_a += last_index; | 
|  | 97   frame_offset_b += last_index; | 
|  | 98 #else | 
| 51   memset(dot_product, 0, sizeof(*dot_product) * a->channels()); | 99   memset(dot_product, 0, sizeof(*dot_product) * a->channels()); | 
|  | 100 #endif  // defined(USE_SIMD) | 
|  | 101 | 
|  | 102   // C version is required to handle remainder of frames (% 4 != 0) | 
| 52   for (int k = 0; k < a->channels(); ++k) { | 103   for (int k = 0; k < a->channels(); ++k) { | 
| 53     const float* ch_a = a->channel(k) + frame_offset_a; | 104     const float* ch_a = a->channel(k) + frame_offset_a; | 
| 54     const float* ch_b = b->channel(k) + frame_offset_b; | 105     const float* ch_b = b->channel(k) + frame_offset_b; | 
| 55     for (int n = 0; n < num_frames; ++n) { | 106     for (int n = 0; n < num_frames; ++n) | 
| 56       dot_product[k] += *ch_a++ * *ch_b++; | 107       dot_product[k] += *ch_a++ * *ch_b++; | 
| 57     } |  | 
| 58   } | 108   } | 
| 59 } | 109 } | 
| 60 | 110 | 
| 61 void MultiChannelMovingBlockEnergies(const AudioBus* input, | 111 void MultiChannelMovingBlockEnergies(const AudioBus* input, | 
| 62                                      int frames_per_block, | 112                                      int frames_per_block, | 
| 63                                      float* energy) { | 113                                      float* energy) { | 
| 64   int num_blocks = input->frames() - (frames_per_block - 1); | 114   int num_blocks = input->frames() - (frames_per_block - 1); | 
| 65   int channels = input->channels(); | 115   int channels = input->channels(); | 
| 66 | 116 | 
| 67   for (int k = 0; k < input->channels(); ++k) { | 117   for (int k = 0; k < input->channels(); ++k) { | 
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 261 void GetSymmetricHanningWindow(int window_length, float* window) { | 311 void GetSymmetricHanningWindow(int window_length, float* window) { | 
| 262   const float scale = 2.0f * M_PI / window_length; | 312   const float scale = 2.0f * M_PI / window_length; | 
| 263   for (int n = 0; n < window_length; ++n) | 313   for (int n = 0; n < window_length; ++n) | 
| 264     window[n] = 0.5f * (1.0f - cosf(n * scale)); | 314     window[n] = 0.5f * (1.0f - cosf(n * scale)); | 
| 265 } | 315 } | 
| 266 | 316 | 
| 267 }  // namespace internal | 317 }  // namespace internal | 
| 268 | 318 | 
| 269 }  // namespace media | 319 }  // namespace media | 
| 270 | 320 | 
| OLD | NEW | 
|---|