media/base/sinc_resampler.cc - Issue 19470003: DO NOT COMMIT. Diff of downstream webrtc resampler commit for review purposes.

Side by Side Diff: media/base/sinc_resampler.cc

Issue 19470003: DO NOT COMMIT. Diff of downstream webrtc resampler commit for review purposes. Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: . Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 /*

2 // Use of this source code is governed by a BSD-style license that can be	2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.

3 // found in the LICENSE file.	3 *

4 //	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 // Modified from the Chromium original:

	12 // src/media/base/sinc_resampler.cc

	13

5 // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_	14 // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_

6 // and r4_ will move after the first load):	15 // and r4_ will move after the first load):

7 //	16 //

8 // \|----------------\|-----------------------------------------\|----------------\|	17 // \|----------------\|-----------------------------------------\|----------------\|

9 //	18 //

10 // request_frames_	19 // request_frames_

11 // <--------------------------------------------------------->	20 // <--------------------------------------------------------->

12 // r0_ (during first load)	21 // r0_ (during first load)

13 //	22 //

14 // kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2	23 // kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
69 // r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).	78 // r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).

70 //	79 //

71 // 8) Else, if we're not on the second load, goto (4).	80 // 8) Else, if we're not on the second load, goto (4).

72 //	81 //

73 // Note: we're glossing over how the sub-sample handling works with	82 // Note: we're glossing over how the sub-sample handling works with

74 // \|virtual_source_idx_\|, etc.	83 // \|virtual_source_idx_\|, etc.

75	84

76 // MSVC++ requires this to be set before any other includes to get M_PI.	85 // MSVC++ requires this to be set before any other includes to get M_PI.

77 #define _USE_MATH_DEFINES	86 #define _USE_MATH_DEFINES

78	87

79 #include "media/base/sinc_resampler.h"	88 #include "webrtc/common_audio/resampler/sinc_resampler.h"

	89 #include "webrtc/system_wrappers/interface/compile_assert.h"

	90 #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"

	91 #include "webrtc/typedefs.h"

80	92

81 #include <cmath>	93 #include <cmath>

	94 #include <cstring>

82 #include <limits>	95 #include <limits>

83	96

84 #include "base/cpu.h"	97 namespace webrtc {

85 #include "base/logging.h"

86

87 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

88 #include <arm_neon.h>

89 #endif

90

91 namespace media {

92	98

93 static double SincScaleFactor(double io_ratio) {	99 static double SincScaleFactor(double io_ratio) {

94 // \|sinc_scale_factor\| is basically the normalized cutoff frequency of the	100 // \|sinc_scale_factor\| is basically the normalized cutoff frequency of the

95 // low-pass filter.	101 // low-pass filter.

96 double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;	102 double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0;

97	103

98 // The sinc function is an idealized brick-wall filter, but since we're	104 // The sinc function is an idealized brick-wall filter, but since we're

99 // windowing it the transition from pass to stop does not happen right away.	105 // windowing it the transition from pass to stop does not happen right away.

100 // So we should adjust the low pass filter cutoff slightly downward to avoid	106 // So we should adjust the low pass filter cutoff slightly downward to avoid

101 // some aliasing at the very high-end.	107 // some aliasing at the very high-end.

102 // TODO(crogers): this value is empirical and to be more exact should vary	108 // TODO(crogers): this value is empirical and to be more exact should vary

103 // depending on kKernelSize.	109 // depending on kKernelSize.

104 sinc_scale_factor *= 0.9;	110 sinc_scale_factor *= 0.9;

105	111

106 return sinc_scale_factor;	112 return sinc_scale_factor;

107 }	113 }

108	114

109 // If we know the minimum architecture at compile time, avoid CPU detection.	115 // If we know the minimum architecture at compile time, avoid CPU detection.

110 // Force NaCl code to use C routines since (at present) nothing there uses these	116 // iOS lies about its architecture, so we also need to exclude it here.

111 // methods and plumbing the -msse built library is non-trivial. iOS lies	117 #if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WEBRTC_IOS)

112 // about its architecture, so we also need to exclude it here.

113 #if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_NACL) && !defined(OS_IOS)

114 #if defined(__SSE__)	118 #if defined(__SSE__)

115 #define CONVOLVE_FUNC Convolve_SSE	119 #define CONVOLVE_FUNC Convolve_SSE

116 void SincResampler::InitializeCPUSpecificFeatures() {}	120 void SincResampler::InitializeCPUSpecificFeatures() {}

117 #else	121 #else

118 // X86 CPU detection required. Functions will be set by	122 // X86 CPU detection required. Function will be set by

119 // InitializeCPUSpecificFeatures().	123 // InitializeCPUSpecificFeatures().

120 // TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed.	124 // TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed.

121 #define CONVOLVE_FUNC g_convolve_proc_	125 #define CONVOLVE_FUNC convolve_proc_

122

123 typedef float (ConvolveProc)(const float, const float, const float, double);

124 static ConvolveProc g_convolve_proc_ = NULL;

125	126

126 void SincResampler::InitializeCPUSpecificFeatures() {	127 void SincResampler::InitializeCPUSpecificFeatures() {

127 CHECK(!g_convolve_proc_);	128 convolve_proc_ = WebRtc_GetCPUInfo(kSSE2) ? Convolve_SSE : Convolve_C;

128 g_convolve_proc_ = base::CPU().has_sse() ? Convolve_SSE : Convolve_C;

129 }	129 }

130 #endif	130 #endif

131 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)	131 #elif defined(WEBRTC_ARCH_ARM_V7)

	132 #if defined(WEBRTC_ARCH_ARM_NEON)

132 #define CONVOLVE_FUNC Convolve_NEON	133 #define CONVOLVE_FUNC Convolve_NEON

133 void SincResampler::InitializeCPUSpecificFeatures() {}	134 void SincResampler::InitializeCPUSpecificFeatures() {}

134 #else	135 #else

	136 // NEON CPU detection required. Function will be set by

	137 // InitializeCPUSpecificFeatures().

	138 #define CONVOLVE_FUNC convolve_proc_

	139

	140 void SincResampler::InitializeCPUSpecificFeatures() {

	141 convolve_proc_ = WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON ?

	142 Convolve_NEON : Convolve_C;

	143 }

	144 #endif

	145 #else

135 // Unknown architecture.	146 // Unknown architecture.

136 #define CONVOLVE_FUNC Convolve_C	147 #define CONVOLVE_FUNC Convolve_C

137 void SincResampler::InitializeCPUSpecificFeatures() {}	148 void SincResampler::InitializeCPUSpecificFeatures() {}

138 #endif	149 #endif

139	150

140 SincResampler::SincResampler(double io_sample_rate_ratio,	151 SincResampler::SincResampler(double io_sample_rate_ratio,

141 int request_frames,	152 int request_frames,

142 const ReadCB& read_cb)	153 SincResamplerCallback* read_cb)

143 : io_sample_rate_ratio_(io_sample_rate_ratio),	154 : io_sample_rate_ratio_(io_sample_rate_ratio),

144 read_cb_(read_cb),	155 read_cb_(read_cb),

145 request_frames_(request_frames),	156 request_frames_(request_frames),

146 input_buffer_size_(request_frames_ + kKernelSize),	157 input_buffer_size_(request_frames_ + kKernelSize),

147 // Create input buffers with a 16-byte alignment for SSE optimizations.	158 // Create input buffers with a 16-byte alignment for SSE optimizations.

148 kernel_storage_(static_cast<float*>(	159 kernel_storage_(static_cast<float*>(

149 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),	160 AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),

150 kernel_pre_sinc_storage_(static_cast<float*>(	161 kernel_pre_sinc_storage_(static_cast<float*>(

151 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),	162 AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),

152 kernel_window_storage_(static_cast<float*>(	163 kernel_window_storage_(static_cast<float*>(

153 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),	164 AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),

154 input_buffer_(static_cast<float*>(	165 input_buffer_(static_cast<float*>(

155 base::AlignedAlloc(sizeof(float) * input_buffer_size_, 16))),	166 AlignedMalloc(sizeof(float) * input_buffer_size_, 16))),

	167 #if defined(WEBRTC_RESAMPLER_CPU_DETECTION)

	168 convolve_proc_(NULL),

	169 #endif

156 r1_(input_buffer_.get()),	170 r1_(input_buffer_.get()),

157 r2_(input_buffer_.get() + kKernelSize / 2) {	171 r2_(input_buffer_.get() + kKernelSize / 2) {

158 CHECK_GT(request_frames_, 0);	172 #if defined(WEBRTC_RESAMPLER_CPU_DETECTION)

	173 InitializeCPUSpecificFeatures();

	174 assert(convolve_proc_);

	175 #endif

	176 assert(request_frames_ > 0);

159 Flush();	177 Flush();

160 CHECK_GT(block_size_, kKernelSize)	178 assert(block_size_ > kKernelSize);

161 << "block_size must be greater than kKernelSize!";

162	179

163 memset(kernel_storage_.get(), 0,	180 memset(kernel_storage_.get(), 0,

164 sizeof(kernel_storage_.get()) kKernelStorageSize);	181 sizeof(kernel_storage_.get()) kKernelStorageSize);

165 memset(kernel_pre_sinc_storage_.get(), 0,	182 memset(kernel_pre_sinc_storage_.get(), 0,

166 sizeof(kernel_pre_sinc_storage_.get()) kKernelStorageSize);	183 sizeof(kernel_pre_sinc_storage_.get()) kKernelStorageSize);

167 memset(kernel_window_storage_.get(), 0,	184 memset(kernel_window_storage_.get(), 0,

168 sizeof(kernel_window_storage_.get()) kKernelStorageSize);	185 sizeof(kernel_window_storage_.get()) kKernelStorageSize);

169	186

170 InitializeKernel();	187 InitializeKernel();

171 }	188 }

172	189

173 SincResampler::~SincResampler() {}	190 SincResampler::~SincResampler() {}

174	191

175 void SincResampler::UpdateRegions(bool second_load) {	192 void SincResampler::UpdateRegions(bool second_load) {

176 // Setup various region pointers in the buffer (see diagram above). If we're	193 // Setup various region pointers in the buffer (see diagram above). If we're

177 // on the second load we need to slide r0_ to the right by kKernelSize / 2.	194 // on the second load we need to slide r0_ to the right by kKernelSize / 2.

178 r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);	195 r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);

179 r3_ = r0_ + request_frames_ - kKernelSize;	196 r3_ = r0_ + request_frames_ - kKernelSize;

180 r4_ = r0_ + request_frames_ - kKernelSize / 2;	197 r4_ = r0_ + request_frames_ - kKernelSize / 2;

181 block_size_ = r4_ - r2_;	198 block_size_ = r4_ - r2_;

182	199

183 // r1_ at the beginning of the buffer.	200 // r1_ at the beginning of the buffer.

184 CHECK_EQ(r1_, input_buffer_.get());	201 assert(r1_ == input_buffer_.get());

185 // r1_ left of r2_, r4_ left of r3_ and size correct.	202 // r1_ left of r2_, r4_ left of r3_ and size correct.

186 CHECK_EQ(r2_ - r1_, r4_ - r3_);	203 assert(r2_ - r1_ == r4_ - r3_);

187 // r2_ left of r3.	204 // r2_ left of r3.

188 CHECK_LT(r2_, r3_);	205 assert(r2_ < r3_);

189 }	206 }

190	207

191 void SincResampler::InitializeKernel() {	208 void SincResampler::InitializeKernel() {

192 // Blackman window parameters.	209 // Blackman window parameters.

193 static const double kAlpha = 0.16;	210 static const double kAlpha = 0.16;

194 static const double kA0 = 0.5 * (1.0 - kAlpha);	211 static const double kA0 = 0.5 * (1.0 - kAlpha);

195 static const double kA1 = 0.5;	212 static const double kA1 = 0.5;

196 static const double kA2 = 0.5 * kAlpha;	213 static const double kA2 = 0.5 * kAlpha;

197	214

198 // Generates a set of windowed sinc() kernels.	215 // Generates a set of windowed sinc() kernels.

199 // We generate a range of sub-sample offsets from 0.0 to 1.0.	216 // We generate a range of sub-sample offsets from 0.0 to 1.0.

200 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);	217 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);

201 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {	218 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {

202 const float subsample_offset =	219 const float subsample_offset =

203 static_cast<float>(offset_idx) / kKernelOffsetCount;	220 static_cast<float>(offset_idx) / kKernelOffsetCount;

204	221

205 for (int i = 0; i < kKernelSize; ++i) {	222 for (int i = 0; i < kKernelSize; ++i) {

206 const int idx = i + offset_idx * kKernelSize;	223 const int idx = i + offset_idx * kKernelSize;

207 const float pre_sinc = M_PI * (i - kKernelSize / 2 - subsample_offset);	224 const float pre_sinc = M_PI * (i - kKernelSize / 2 - subsample_offset);

208 kernel_pre_sinc_storage_[idx] = pre_sinc;	225 kernel_pre_sinc_storage_.get()[idx] = pre_sinc;

209	226

210 // Compute Blackman window, matching the offset of the sinc().	227 // Compute Blackman window, matching the offset of the sinc().

211 const float x = (i - subsample_offset) / kKernelSize;	228 const float x = (i - subsample_offset) / kKernelSize;

212 const float window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2	229 const float window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2

213 * cos(4.0 * M_PI * x);	230 * cos(4.0 * M_PI * x);

214 kernel_window_storage_[idx] = window;	231 kernel_window_storage_.get()[idx] = window;

215	232

216 // Compute the sinc with offset, then window the sinc() function and store	233 // Compute the sinc with offset, then window the sinc() function and store

217 // at the correct offset.	234 // at the correct offset.

218 if (pre_sinc == 0) {	235 if (pre_sinc == 0) {

219 kernel_storage_[idx] = sinc_scale_factor * window;	236 kernel_storage_.get()[idx] = sinc_scale_factor * window;

220 } else {	237 } else {

221 kernel_storage_[idx] =	238 kernel_storage_.get()[idx] =

222 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;	239 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;

223 }	240 }

224 }	241 }

225 }	242 }

226 }	243 }

227	244

228 void SincResampler::SetRatio(double io_sample_rate_ratio) {	245 void SincResampler::SetRatio(double io_sample_rate_ratio) {

229 if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <	246 if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <

230 std::numeric_limits<double>::epsilon()) {	247 std::numeric_limits<double>::epsilon()) {

231 return;	248 return;

232 }	249 }

233	250

234 io_sample_rate_ratio_ = io_sample_rate_ratio;	251 io_sample_rate_ratio_ = io_sample_rate_ratio;

235	252

236 // Optimize reinitialization by reusing values which are independent of	253 // Optimize reinitialization by reusing values which are independent of

237 // \|sinc_scale_factor\|. Provides a 3x speedup.	254 // \|sinc_scale_factor\|. Provides a 3x speedup.

238 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);	255 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);

239 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {	256 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {

240 for (int i = 0; i < kKernelSize; ++i) {	257 for (int i = 0; i < kKernelSize; ++i) {

241 const int idx = i + offset_idx * kKernelSize;	258 const int idx = i + offset_idx * kKernelSize;

242 const float window = kernel_window_storage_[idx];	259 const float window = kernel_window_storage_.get()[idx];

243 const float pre_sinc = kernel_pre_sinc_storage_[idx];	260 const float pre_sinc = kernel_pre_sinc_storage_.get()[idx];

244	261

245 if (pre_sinc == 0) {	262 if (pre_sinc == 0) {

246 kernel_storage_[idx] = sinc_scale_factor * window;	263 kernel_storage_.get()[idx] = sinc_scale_factor * window;

247 } else {	264 } else {

248 kernel_storage_[idx] =	265 kernel_storage_.get()[idx] =

249 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;	266 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;

250 }	267 }

251 }	268 }

252 }	269 }

253 }	270 }

254	271

255 void SincResampler::Resample(int frames, float* destination) {	272 void SincResampler::Resample(int frames, float* destination) {

256 int remaining_frames = frames;	273 int remaining_frames = frames;

257	274

258 // Step (1) -- Prime the input buffer at the start of the input stream.	275 // Step (1) -- Prime the input buffer at the start of the input stream.

259 if (!buffer_primed_ && remaining_frames) {	276 if (!buffer_primed_ && remaining_frames) {

260 read_cb_.Run(request_frames_, r0_);	277 read_cb_->Run(request_frames_, r0_);

261 buffer_primed_ = true;	278 buffer_primed_ = true;

262 }	279 }

263	280

264 // Step (2) -- Resample! const what we can outside of the loop for speed. It	281 // Step (2) -- Resample! const what we can outside of the loop for speed. It

265 // actually has an impact on ARM performance. See inner loop comment below.	282 // actually has an impact on ARM performance. See inner loop comment below.

266 const double current_io_ratio = io_sample_rate_ratio_;	283 const double current_io_ratio = io_sample_rate_ratio_;

267 const float* const kernel_ptr = kernel_storage_.get();	284 const float* const kernel_ptr = kernel_storage_.get();

268 while (remaining_frames) {	285 while (remaining_frames) {

269 // \|i\| may be negative if the last Resample() call ended on an iteration	286 // \|i\| may be negative if the last Resample() call ended on an iteration

270 // that put \|virtual_source_idx_\| over the limit.	287 // that put \|virtual_source_idx_\| over the limit.

271 //	288 //

272 // Note: The loop construct here can severely impact performance on ARM	289 // Note: The loop construct here can severely impact performance on ARM

273 // or when built with clang. See https://codereview.chromium.org/18566009/	290 // or when built with clang. See https://codereview.chromium.org/18566009/

274 for (int i = ceil((block_size_ - virtual_source_idx_) / current_io_ratio);	291 for (int i = ceil((block_size_ - virtual_source_idx_) / current_io_ratio);

275 i > 0; --i) {	292 i > 0; --i) {

276 DCHECK_LT(virtual_source_idx_, block_size_);	293 assert(virtual_source_idx_ < block_size_);

277	294

278 // \|virtual_source_idx_\| lies in between two kernel offsets so figure out	295 // \|virtual_source_idx_\| lies in between two kernel offsets so figure out

279 // what they are.	296 // what they are.

280 const int source_idx = virtual_source_idx_;	297 const int source_idx = virtual_source_idx_;

281 const double subsample_remainder = virtual_source_idx_ - source_idx;	298 const double subsample_remainder = virtual_source_idx_ - source_idx;

282	299

283 const double virtual_offset_idx =	300 const double virtual_offset_idx =

284 subsample_remainder * kKernelOffsetCount;	301 subsample_remainder * kKernelOffsetCount;

285 const int offset_idx = virtual_offset_idx;	302 const int offset_idx = virtual_offset_idx;

286	303

287 // We'll compute "convolutions" for the two kernels which straddle	304 // We'll compute "convolutions" for the two kernels which straddle

288 // \|virtual_source_idx_\|.	305 // \|virtual_source_idx_\|.

289 const float* const k1 = kernel_ptr + offset_idx * kKernelSize;	306 const float* const k1 = kernel_ptr + offset_idx * kKernelSize;

290 const float* const k2 = k1 + kKernelSize;	307 const float* const k2 = k1 + kKernelSize;

291	308

292 // Ensure \|k1\|, \|k2\| are 16-byte aligned for SIMD usage. Should always be	309 // Ensure \|k1\|, \|k2\| are 16-byte aligned for SIMD usage. Should always be

293 // true so long as kKernelSize is a multiple of 16.	310 // true so long as kKernelSize is a multiple of 16.

294 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);	311 assert(0u == (reinterpret_cast<uintptr_t>(k1) & 0x0F));

295 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);	312 assert(0u == (reinterpret_cast<uintptr_t>(k2) & 0x0F));

296	313

297 // Initialize input pointer based on quantized \|virtual_source_idx_\|.	314 // Initialize input pointer based on quantized \|virtual_source_idx_\|.

298 const float* const input_ptr = r1_ + source_idx;	315 const float* const input_ptr = r1_ + source_idx;

299	316

300 // Figure out how much to weight each kernel's "convolution".	317 // Figure out how much to weight each kernel's "convolution".

301 const double kernel_interpolation_factor =	318 const double kernel_interpolation_factor =

302 virtual_offset_idx - offset_idx;	319 virtual_offset_idx - offset_idx;

303 *destination++ = CONVOLVE_FUNC(	320 *destination++ = CONVOLVE_FUNC(

304 input_ptr, k1, k2, kernel_interpolation_factor);	321 input_ptr, k1, k2, kernel_interpolation_factor);

305	322

306 // Advance the virtual index.	323 // Advance the virtual index.

307 virtual_source_idx_ += current_io_ratio;	324 virtual_source_idx_ += current_io_ratio;

308	325

309 if (!--remaining_frames)	326 if (!--remaining_frames)

310 return;	327 return;

311 }	328 }

312	329

313 // Wrap back around to the start.	330 // Wrap back around to the start.

314 virtual_source_idx_ -= block_size_;	331 virtual_source_idx_ -= block_size_;

315	332

316 // Step (3) -- Copy r3_, r4_ to r1_, r2_.	333 // Step (3) -- Copy r3_, r4_ to r1_, r2_.

317 // This wraps the last input frames back to the start of the buffer.	334 // This wraps the last input frames back to the start of the buffer.

318 memcpy(r1_, r3_, sizeof(input_buffer_.get()) kKernelSize);	335 memcpy(r1_, r3_, sizeof(input_buffer_.get()) kKernelSize);

319	336

320 // Step (4) -- Reinitialize regions if necessary.	337 // Step (4) -- Reinitialize regions if necessary.

321 if (r0_ == r2_)	338 if (r0_ == r2_)

322 UpdateRegions(true);	339 UpdateRegions(true);

323	340

324 // Step (5) -- Refresh the buffer with more input.	341 // Step (5) -- Refresh the buffer with more input.

325 read_cb_.Run(request_frames_, r0_);	342 read_cb_->Run(request_frames_, r0_);

326 }	343 }

327 }	344 }

328	345

329 #undef CONVOLVE_FUNC	346 #undef CONVOLVE_FUNC

330	347

331 int SincResampler::ChunkSize() const {	348 int SincResampler::ChunkSize() const {

332 return block_size_ / io_sample_rate_ratio_;	349 return block_size_ / io_sample_rate_ratio_;

333 }	350 }

334	351

335 void SincResampler::Flush() {	352 void SincResampler::Flush() {

(...skipping 16 matching lines...) Expand all Loading...
352 while (n--) {	369 while (n--) {

353 sum1 += input_ptr *k1++;	370 sum1 += input_ptr *k1++;

354 sum2 += input_ptr++ *k2++;	371 sum2 += input_ptr++ *k2++;

355 }	372 }

356	373

357 // Linearly interpolate the two "convolutions".	374 // Linearly interpolate the two "convolutions".

358 return (1.0 - kernel_interpolation_factor) * sum1	375 return (1.0 - kernel_interpolation_factor) * sum1

359 + kernel_interpolation_factor * sum2;	376 + kernel_interpolation_factor * sum2;

360 }	377 }

361	378

362 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)	379 } // namespace webrtc

363 float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1,

364 const float* k2,

365 double kernel_interpolation_factor) {

366 float32x4_t m_input;

367 float32x4_t m_sums1 = vmovq_n_f32(0);

368 float32x4_t m_sums2 = vmovq_n_f32(0);

369

370 const float* upper = input_ptr + kKernelSize;

371 for (; input_ptr < upper; ) {

372 m_input = vld1q_f32(input_ptr);

373 input_ptr += 4;

374 m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1));

375 k1 += 4;

376 m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2));

377 k2 += 4;

378 }

379

380 // Linearly interpolate the two "convolutions".

381 m_sums1 = vmlaq_f32(

382 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),

383 m_sums2, vmovq_n_f32(kernel_interpolation_factor));

384

385 // Sum components together.

386 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));

387 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);

388 }

389 #endif

390

391 } // namespace media

OLD	NEW

« no previous file with comments | « media/base/sinc_resampler.h ('k') | media/base/sinc_resampler_unittest.cc » ('j') | no next file with comments »