OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_): | 5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_): |
6 // | 6 // |
7 // |----------------|-----------------------------------------|----------------| | 7 // |----------------|-----------------------------------------|----------------| |
8 // | 8 // |
9 // kBlockSize + kKernelSize / 2 | 9 // kBlockSize + kKernelSize / 2 |
10 // <---------------------------------------------------------> | 10 // <---------------------------------------------------------> |
(...skipping 29 matching lines...) Expand all Loading... | |
40 | 40 |
41 #include "base/cpu.h" | 41 #include "base/cpu.h" |
42 #include "base/logging.h" | 42 #include "base/logging.h" |
43 | 43 |
44 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) | 44 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) |
45 #include <arm_neon.h> | 45 #include <arm_neon.h> |
46 #endif | 46 #endif |
47 | 47 |
48 namespace media { | 48 namespace media { |
49 | 49 |
50 static double SincScaleFactor(double io_ratio) { | |
51 // |sinc_scale_factor| is basically the normalized cutoff frequency of the | |
52 // low-pass filter. | |
53 double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0; | |
54 | |
55 // The sinc function is an idealized brick-wall filter, but since we're | |
56 // windowing it the transition from pass to stop does not happen right away. | |
57 // So we should adjust the low pass filter cutoff slightly downward to avoid | |
58 // some aliasing at the very high-end. | |
59 // TODO(crogers): this value is empirical and to be more exact should vary | |
60 // depending on kKernelSize. | |
61 sinc_scale_factor *= 0.9; | |
62 | |
63 return sinc_scale_factor; | |
64 } | |
65 | |
50 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb) | 66 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb) |
51 : io_sample_rate_ratio_(io_sample_rate_ratio), | 67 : io_sample_rate_ratio_(io_sample_rate_ratio), |
52 virtual_source_idx_(0), | 68 virtual_source_idx_(0), |
53 buffer_primed_(false), | 69 buffer_primed_(false), |
54 read_cb_(read_cb), | 70 read_cb_(read_cb), |
55 // Create input buffers with a 16-byte alignment for SSE optimizations. | 71 // Create input buffers with a 16-byte alignment for SSE optimizations. |
56 kernel_storage_(static_cast<float*>( | 72 kernel_storage_(static_cast<float*>( |
57 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), | 73 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), |
74 kernel_pre_sinc_storage_(static_cast<float*>( | |
75 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), | |
76 kernel_window_storage_(static_cast<float*>( | |
77 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), | |
58 input_buffer_(static_cast<float*>( | 78 input_buffer_(static_cast<float*>( |
59 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))), | 79 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))), |
60 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__) | 80 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__) |
61 convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C), | 81 convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C), |
62 #endif | 82 #endif |
63 // Setup various region pointers in the buffer (see diagram above). | 83 // Setup various region pointers in the buffer (see diagram above). |
64 r0_(input_buffer_.get() + kKernelSize / 2), | 84 r0_(input_buffer_.get() + kKernelSize / 2), |
65 r1_(input_buffer_.get()), | 85 r1_(input_buffer_.get()), |
66 r2_(r0_), | 86 r2_(r0_), |
67 r3_(r0_ + kBlockSize - kKernelSize / 2), | 87 r3_(r0_ + kBlockSize - kKernelSize / 2), |
(...skipping 14 matching lines...) Expand all Loading... | |
82 DCHECK_EQ(r2_ - r1_, r5_ - r2_); | 102 DCHECK_EQ(r2_ - r1_, r5_ - r2_); |
83 // r3_ left of r4_, r5_ left of r0_ and r3_ size correct. | 103 // r3_ left of r4_, r5_ left of r0_ and r3_ size correct. |
84 DCHECK_EQ(r4_ - r3_, r5_ - r0_); | 104 DCHECK_EQ(r4_ - r3_, r5_ - r0_); |
85 // r3_, r4_ size correct and r4_ at the end of the buffer. | 105 // r3_, r4_ size correct and r4_ at the end of the buffer. |
86 DCHECK_EQ(r4_ + (r4_ - r3_), r1_ + kBufferSize); | 106 DCHECK_EQ(r4_ + (r4_ - r3_), r1_ + kBufferSize); |
87 // r5_ size correct and at the end of the buffer. | 107 // r5_ size correct and at the end of the buffer. |
88 DCHECK_EQ(r5_ + kBlockSize, r1_ + kBufferSize); | 108 DCHECK_EQ(r5_ + kBlockSize, r1_ + kBufferSize); |
89 | 109 |
90 memset(kernel_storage_.get(), 0, | 110 memset(kernel_storage_.get(), 0, |
91 sizeof(*kernel_storage_.get()) * kKernelStorageSize); | 111 sizeof(*kernel_storage_.get()) * kKernelStorageSize); |
112 memset(kernel_pre_sinc_storage_.get(), 0, | |
113 sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize); | |
114 memset(kernel_window_storage_.get(), 0, | |
115 sizeof(*kernel_window_storage_.get()) * kKernelStorageSize); | |
92 memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize); | 116 memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * kBufferSize); |
93 | 117 |
94 InitializeKernel(); | 118 InitializeKernel(); |
95 } | 119 } |
96 | 120 |
97 SincResampler::~SincResampler() {} | 121 SincResampler::~SincResampler() {} |
98 | 122 |
99 void SincResampler::InitializeKernel() { | 123 void SincResampler::InitializeKernel() { |
100 // Blackman window parameters. | 124 // Blackman window parameters. |
101 static const double kAlpha = 0.16; | 125 static const double kAlpha = 0.16; |
102 static const double kA0 = 0.5 * (1.0 - kAlpha); | 126 static const double kA0 = 0.5 * (1.0 - kAlpha); |
103 static const double kA1 = 0.5; | 127 static const double kA1 = 0.5; |
104 static const double kA2 = 0.5 * kAlpha; | 128 static const double kA2 = 0.5 * kAlpha; |
105 | 129 |
106 // |sinc_scale_factor| is basically the normalized cutoff frequency of the | |
107 // low-pass filter. | |
108 double sinc_scale_factor = | |
109 io_sample_rate_ratio_ > 1.0 ? 1.0 / io_sample_rate_ratio_ : 1.0; | |
110 | |
111 // The sinc function is an idealized brick-wall filter, but since we're | |
112 // windowing it the transition from pass to stop does not happen right away. | |
113 // So we should adjust the low pass filter cutoff slightly downward to avoid | |
114 // some aliasing at the very high-end. | |
115 // TODO(crogers): this value is empirical and to be more exact should vary | |
116 // depending on kKernelSize. | |
117 sinc_scale_factor *= 0.9; | |
118 | |
119 // Generates a set of windowed sinc() kernels. | 130 // Generates a set of windowed sinc() kernels. |
120 // We generate a range of sub-sample offsets from 0.0 to 1.0. | 131 // We generate a range of sub-sample offsets from 0.0 to 1.0. |
132 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); | |
121 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { | 133 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { |
122 double subsample_offset = | 134 const float subsample_offset = |
123 static_cast<double>(offset_idx) / kKernelOffsetCount; | 135 static_cast<float>(offset_idx) / kKernelOffsetCount; |
124 | 136 |
125 for (int i = 0; i < kKernelSize; ++i) { | 137 for (int i = 0; i < kKernelSize; ++i) { |
126 // Compute the sinc with offset. | 138 const int idx = i + offset_idx * kKernelSize; |
127 double s = | 139 const float pre_sinc = M_PI * (i - kKernelSize / 2 - subsample_offset); |
128 sinc_scale_factor * M_PI * (i - kKernelSize / 2 - subsample_offset); | 140 kernel_pre_sinc_storage_[idx] = pre_sinc; |
129 double sinc = (!s ? 1.0 : sin(s) / s) * sinc_scale_factor; | |
130 | 141 |
131 // Compute Blackman window, matching the offset of the sinc(). | 142 // Compute Blackman window, matching the offset of the sinc(). |
132 double x = (i - subsample_offset) / kKernelSize; | 143 const float x = (i - subsample_offset) / kKernelSize; |
133 double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2 | 144 const float window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2 |
134 * cos(4.0 * M_PI * x); | 145 * cos(4.0 * M_PI * x); |
146 kernel_window_storage_[idx] = window; | |
135 | 147 |
136 // Window the sinc() function and store at the correct offset. | 148 // Compute the sinc with offset, then window the sinc() function and store |
137 kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window; | 149 // at the correct offset. |
150 if (pre_sinc == 0) { | |
151 kernel_storage_[idx] = sinc_scale_factor * window; | |
152 } else { | |
153 kernel_storage_[idx] = | |
154 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc; | |
155 } | |
138 } | 156 } |
139 } | 157 } |
140 } | 158 } |
159 | |
160 void SincResampler::SetRatio(double io_sample_rate_ratio) { | |
Chris Rogers
2013/04/16 21:36:23
Return early if the ratio is un-changing?
DaleCurtis
2013/04/16 22:29:26
Done using simple fabs() check for now. Sent a qu
| |
161 io_sample_rate_ratio_ = io_sample_rate_ratio; | |
162 | |
163 // Optimize reinitialization by reusing values which are independent of | |
164 // |sinc_scale_factor|. Provides a 3x speedup. | |
165 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); | |
166 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { | |
167 for (int i = 0; i < kKernelSize; ++i) { | |
168 const int idx = i + offset_idx * kKernelSize; | |
169 const float window = kernel_window_storage_[idx]; | |
170 const float pre_sinc = kernel_pre_sinc_storage_[idx]; | |
171 | |
172 if (pre_sinc == 0) { | |
173 kernel_storage_[idx] = sinc_scale_factor * window; | |
174 } else { | |
175 kernel_storage_[idx] = | |
176 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc; | |
177 } | |
178 } | |
179 } | |
180 } | |
141 | 181 |
142 // If we know the minimum architecture avoid function hopping for CPU detection. | 182 // If we know the minimum architecture avoid function hopping for CPU detection. |
143 #if defined(ARCH_CPU_X86_FAMILY) | 183 #if defined(ARCH_CPU_X86_FAMILY) |
144 #if defined(__SSE__) | 184 #if defined(__SSE__) |
145 #define CONVOLVE_FUNC Convolve_SSE | 185 #define CONVOLVE_FUNC Convolve_SSE |
146 #else | 186 #else |
147 // X86 CPU detection required. |convolve_proc_| will be set upon construction. | 187 // X86 CPU detection required. |convolve_proc_| will be set upon construction. |
148 // TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed. | 188 // TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed. |
149 #define CONVOLVE_FUNC convolve_proc_ | 189 #define CONVOLVE_FUNC convolve_proc_ |
150 #endif | 190 #endif |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
268 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), | 308 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), |
269 m_sums2, vmovq_n_f32(kernel_interpolation_factor)); | 309 m_sums2, vmovq_n_f32(kernel_interpolation_factor)); |
270 | 310 |
271 // Sum components together. | 311 // Sum components together. |
272 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); | 312 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); |
273 return vget_lane_f32(vpadd_f32(m_half, m_half), 0); | 313 return vget_lane_f32(vpadd_f32(m_half, m_half), 0); |
274 } | 314 } |
275 #endif | 315 #endif |
276 | 316 |
277 } // namespace media | 317 } // namespace media |
OLD | NEW |