| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_): | 5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_): |
| 6 // | 6 // |
| 7 // |----------------|-----------------------------------------|----------------| | 7 // |----------------|-----------------------------------------|----------------| |
| 8 // | 8 // |
| 9 // kBlockSize + kKernelSize / 2 | 9 // kBlockSize + kKernelSize / 2 |
| 10 // <---------------------------------------------------------> | 10 // <---------------------------------------------------------> |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 // | 30 // |
| 31 // Note: we're glossing over how the sub-sample handling works with | 31 // Note: we're glossing over how the sub-sample handling works with |
| 32 // |virtual_source_idx_|, etc. | 32 // |virtual_source_idx_|, etc. |
| 33 | 33 |
| 34 // MSVC++ requires this to be set before any other includes to get M_PI. | 34 // MSVC++ requires this to be set before any other includes to get M_PI. |
| 35 #define _USE_MATH_DEFINES | 35 #define _USE_MATH_DEFINES |
| 36 | 36 |
| 37 #include "media/base/sinc_resampler.h" | 37 #include "media/base/sinc_resampler.h" |
| 38 | 38 |
| 39 #include <cmath> | 39 #include <cmath> |
| 40 #include <limits> |
| 40 | 41 |
| 41 #include "base/cpu.h" | 42 #include "base/cpu.h" |
| 42 #include "base/logging.h" | 43 #include "base/logging.h" |
| 43 | 44 |
| 44 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 45 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
| 45 #include <arm_neon.h> | 46 #include <arm_neon.h> |
| 46 #endif | 47 #endif |
| 47 | 48 |
| 48 namespace media { | 49 namespace media { |
| 49 | 50 |
| 51 static double SincScaleFactor(double io_ratio) { |
| 52 // |sinc_scale_factor| is basically the normalized cutoff frequency of the |
| 53 // low-pass filter. |
| 54 double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0; |
| 55 |
| 56 // The sinc function is an idealized brick-wall filter, but since we're |
| 57 // windowing it the transition from pass to stop does not happen right away. |
| 58 // So we should adjust the low pass filter cutoff slightly downward to avoid |
| 59 // some aliasing at the very high-end. |
| 60 // TODO(crogers): this value is empirical and to be more exact should vary |
| 61 // depending on kKernelSize. |
| 62 sinc_scale_factor *= 0.9; |
| 63 |
| 64 return sinc_scale_factor; |
| 65 } |
| 66 |
| 50 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb) | 67 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb) |
| 51 : io_sample_rate_ratio_(io_sample_rate_ratio), | 68 : io_sample_rate_ratio_(io_sample_rate_ratio), |
| 52 virtual_source_idx_(0), | 69 virtual_source_idx_(0), |
| 53 buffer_primed_(false), | 70 buffer_primed_(false), |
| 54 read_cb_(read_cb), | 71 read_cb_(read_cb), |
| 55 // Create input buffers with a 16-byte alignment for SSE optimizations. | 72 // Create input buffers with a 16-byte alignment for SSE optimizations. |
| 56 kernel_storage_(static_cast<float*>( | 73 kernel_storage_(static_cast<float*>( |
| 57 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), | 74 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), |
| 75 kernel_pre_sinc_storage_(static_cast<float*>( |
| 76 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), |
| 77 kernel_window_storage_(static_cast<float*>( |
| 78 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), |
| 58 input_buffer_(static_cast<float*>( | 79 input_buffer_(static_cast<float*>( |
| 59 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))), | 80 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))), |
| 60 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__) | 81 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__) |
| 61 convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C), | 82 convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C), |
| 62 #endif | 83 #endif |
| 63 // Setup various region pointers in the buffer (see diagram above). | 84 // Setup various region pointers in the buffer (see diagram above). |
| 64 r0_(input_buffer_.get() + kKernelSize / 2), | 85 r0_(input_buffer_.get() + kKernelSize / 2), |
| 65 r1_(input_buffer_.get()), | 86 r1_(input_buffer_.get()), |
| 66 r2_(r0_), | 87 r2_(r0_), |
| 67 r3_(r0_ + kBlockSize - kKernelSize / 2), | 88 r3_(r0_ + kBlockSize - kKernelSize / 2), |
| (...skipping 14 matching lines...) Expand all Loading... |
| 82 DCHECK_EQ(r2_ - r1_, r5_ - r2_); | 103 DCHECK_EQ(r2_ - r1_, r5_ - r2_); |
| 83 // r3_ left of r4_, r5_ left of r0_ and r3_ size correct. | 104 // r3_ left of r4_, r5_ left of r0_ and r3_ size correct. |
| 84 DCHECK_EQ(r4_ - r3_, r5_ - r0_); | 105 DCHECK_EQ(r4_ - r3_, r5_ - r0_); |
| 85 // r3_, r4_ size correct and r4_ at the end of the buffer. | 106 // r3_, r4_ size correct and r4_ at the end of the buffer. |
| 86 DCHECK_EQ(r4_ + (r4_ - r3_), r1_ + kBufferSize); | 107 DCHECK_EQ(r4_ + (r4_ - r3_), r1_ + kBufferSize); |
| 87 // r5_ size correct and at the end of the buffer. | 108 // r5_ size correct and at the end of the buffer. |
| 88 DCHECK_EQ(r5_ + kBlockSize, r1_ + kBufferSize); | 109 DCHECK_EQ(r5_ + kBlockSize, r1_ + kBufferSize); |
| 89 | 110 |
| 90 memset(kernel_storage_.get(), 0, | 111 memset(kernel_storage_.get(), 0, |
| 91 sizeof(*kernel_storage_.get()) * kKernelStorageSize); | 112 sizeof(*kernel_storage_.get()) * kKernelStorageSize); |
| 113 memset(kernel_pre_sinc_storage_.get(), 0, |
| 114 sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize); |
| 115 memset(kernel_window_storage_.get(), 0, |
| 116 sizeof(*kernel_window_storage_.get()) * kKernelStorageSize); |
| 92 memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize); | 117 memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize); |
| 93 | 118 |
| 94 InitializeKernel(); | 119 InitializeKernel(); |
| 95 } | 120 } |
| 96 | 121 |
| 97 SincResampler::~SincResampler() {} | 122 SincResampler::~SincResampler() {} |
| 98 | 123 |
| 99 void SincResampler::InitializeKernel() { | 124 void SincResampler::InitializeKernel() { |
| 100 // Blackman window parameters. | 125 // Blackman window parameters. |
| 101 static const double kAlpha = 0.16; | 126 static const double kAlpha = 0.16; |
| 102 static const double kA0 = 0.5 * (1.0 - kAlpha); | 127 static const double kA0 = 0.5 * (1.0 - kAlpha); |
| 103 static const double kA1 = 0.5; | 128 static const double kA1 = 0.5; |
| 104 static const double kA2 = 0.5 * kAlpha; | 129 static const double kA2 = 0.5 * kAlpha; |
| 105 | 130 |
| 106 // |sinc_scale_factor| is basically the normalized cutoff frequency of the | |
| 107 // low-pass filter. | |
| 108 double sinc_scale_factor = | |
| 109 io_sample_rate_ratio_ > 1.0 ? 1.0 / io_sample_rate_ratio_ : 1.0; | |
| 110 | |
| 111 // The sinc function is an idealized brick-wall filter, but since we're | |
| 112 // windowing it the transition from pass to stop does not happen right away. | |
| 113 // So we should adjust the low pass filter cutoff slightly downward to avoid | |
| 114 // some aliasing at the very high-end. | |
| 115 // TODO(crogers): this value is empirical and to be more exact should vary | |
| 116 // depending on kKernelSize. | |
| 117 sinc_scale_factor *= 0.9; | |
| 118 | |
| 119 // Generates a set of windowed sinc() kernels. | 131 // Generates a set of windowed sinc() kernels. |
| 120 // We generate a range of sub-sample offsets from 0.0 to 1.0. | 132 // We generate a range of sub-sample offsets from 0.0 to 1.0. |
| 133 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); |
| 121 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { | 134 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { |
| 122 double subsample_offset = | 135 const float subsample_offset = |
| 123 static_cast<double>(offset_idx) / kKernelOffsetCount; | 136 static_cast<float>(offset_idx) / kKernelOffsetCount; |
| 124 | 137 |
| 125 for (int i = 0; i < kKernelSize; ++i) { | 138 for (int i = 0; i < kKernelSize; ++i) { |
| 126 // Compute the sinc with offset. | 139 const int idx = i + offset_idx * kKernelSize; |
| 127 double s = | 140 const float pre_sinc = M_PI * (i - kKernelSize / 2 - subsample_offset); |
| 128 sinc_scale_factor * M_PI * (i - kKernelSize / 2 - subsample_offset); | 141 kernel_pre_sinc_storage_[idx] = pre_sinc; |
| 129 double sinc = (!s ? 1.0 : sin(s) / s) * sinc_scale_factor; | |
| 130 | 142 |
| 131 // Compute Blackman window, matching the offset of the sinc(). | 143 // Compute Blackman window, matching the offset of the sinc(). |
| 132 double x = (i - subsample_offset) / kKernelSize; | 144 const float x = (i - subsample_offset) / kKernelSize; |
| 133 double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2 | 145 const float window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2 |
| 134 * cos(4.0 * M_PI * x); | 146 * cos(4.0 * M_PI * x); |
| 147 kernel_window_storage_[idx] = window; |
| 135 | 148 |
| 136 // Window the sinc() function and store at the correct offset. | 149 // Compute the sinc with offset, then window the sinc() function and store |
| 137 kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window; | 150 // at the correct offset. |
| 151 if (pre_sinc == 0) { |
| 152 kernel_storage_[idx] = sinc_scale_factor * window; |
| 153 } else { |
| 154 kernel_storage_[idx] = |
| 155 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc; |
| 156 } |
| 138 } | 157 } |
| 139 } | 158 } |
| 140 } | 159 } |
| 160 |
| 161 void SincResampler::SetRatio(double io_sample_rate_ratio) { |
| 162 if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) < |
| 163 std::numeric_limits<double>::epsilon()) { |
| 164 return; |
| 165 } |
| 166 |
| 167 io_sample_rate_ratio_ = io_sample_rate_ratio; |
| 168 |
| 169 // Optimize reinitialization by reusing values which are independent of |
| 170 // |sinc_scale_factor|. Provides a 3x speedup. |
| 171 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); |
| 172 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { |
| 173 for (int i = 0; i < kKernelSize; ++i) { |
| 174 const int idx = i + offset_idx * kKernelSize; |
| 175 const float window = kernel_window_storage_[idx]; |
| 176 const float pre_sinc = kernel_pre_sinc_storage_[idx]; |
| 177 |
| 178 if (pre_sinc == 0) { |
| 179 kernel_storage_[idx] = sinc_scale_factor * window; |
| 180 } else { |
| 181 kernel_storage_[idx] = |
| 182 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc; |
| 183 } |
| 184 } |
| 185 } |
| 186 } |
| 141 | 187 |
| 142 // If we know the minimum architecture avoid function hopping for CPU detection. | 188 // If we know the minimum architecture avoid function hopping for CPU detection. |
| 143 #if defined(ARCH_CPU_X86_FAMILY) | 189 #if defined(ARCH_CPU_X86_FAMILY) |
| 144 #if defined(__SSE__) | 190 #if defined(__SSE__) |
| 145 #define CONVOLVE_FUNC Convolve_SSE | 191 #define CONVOLVE_FUNC Convolve_SSE |
| 146 #else | 192 #else |
| 147 // X86 CPU detection required. |convolve_proc_| will be set upon construction. | 193 // X86 CPU detection required. |convolve_proc_| will be set upon construction. |
| 148 // TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed. | 194 // TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed. |
| 149 #define CONVOLVE_FUNC convolve_proc_ | 195 #define CONVOLVE_FUNC convolve_proc_ |
| 150 #endif | 196 #endif |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 268 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), | 314 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), |
| 269 m_sums2, vmovq_n_f32(kernel_interpolation_factor)); | 315 m_sums2, vmovq_n_f32(kernel_interpolation_factor)); |
| 270 | 316 |
| 271 // Sum components together. | 317 // Sum components together. |
| 272 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); | 318 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); |
| 273 return vget_lane_f32(vpadd_f32(m_half, m_half), 0); | 319 return vget_lane_f32(vpadd_f32(m_half, m_half), 0); |
| 274 } | 320 } |
| 275 #endif | 321 #endif |
| 276 | 322 |
| 277 } // namespace media | 323 } // namespace media |
| OLD | NEW |