media/base/sinc_resampler.cc - Issue 638123004: Type conversion fixes, media/ edition.

Side by Side Diff: media/base/sinc_resampler.cc

Issue 638123004: Type conversion fixes, media/ edition. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fix test Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4 //	4 //

5 // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_	5 // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_

6 // and r4_ will move after the first load):	6 // and r4_ will move after the first load):

7 //	7 //

8 // \|----------------\|-----------------------------------------\|----------------\|	8 // \|----------------\|-----------------------------------------\|----------------\|

9 //	9 //

10 // request_frames_	10 // request_frames_

(...skipping 160 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
171	171

172 // Generates a set of windowed sinc() kernels.	172 // Generates a set of windowed sinc() kernels.

173 // We generate a range of sub-sample offsets from 0.0 to 1.0.	173 // We generate a range of sub-sample offsets from 0.0 to 1.0.

174 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);	174 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);

175 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {	175 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {

176 const float subsample_offset =	176 const float subsample_offset =

177 static_cast<float>(offset_idx) / kKernelOffsetCount;	177 static_cast<float>(offset_idx) / kKernelOffsetCount;

178	178

179 for (int i = 0; i < kKernelSize; ++i) {	179 for (int i = 0; i < kKernelSize; ++i) {

180 const int idx = i + offset_idx * kKernelSize;	180 const int idx = i + offset_idx * kKernelSize;

181 const float pre_sinc = M_PI * (i - kKernelSize / 2 - subsample_offset);	181 const float pre_sinc =

	182 static_cast<float>(M_PI * (i - kKernelSize / 2 - subsample_offset));

182 kernel_pre_sinc_storage_[idx] = pre_sinc;	183 kernel_pre_sinc_storage_[idx] = pre_sinc;

183	184

184 // Compute Blackman window, matching the offset of the sinc().	185 // Compute Blackman window, matching the offset of the sinc().

185 const float x = (i - subsample_offset) / kKernelSize;	186 const float x = (i - subsample_offset) / kKernelSize;

186 const float window =	187 const float window = static_cast<float>(kA0 - kA1 * cos(2.0 * M_PI * x) +

187 kA0 - kA1 * cos(2.0 * M_PI * x) + kA2 * cos(4.0 * M_PI * x);	188 kA2 * cos(4.0 * M_PI * x));

188 kernel_window_storage_[idx] = window;	189 kernel_window_storage_[idx] = window;

189	190

190 // Compute the sinc with offset, then window the sinc() function and store	191 // Compute the sinc with offset, then window the sinc() function and store

191 // at the correct offset.	192 // at the correct offset.

192 if (pre_sinc == 0) {	193 kernel_storage_[idx] = static_cast<float>(window *

193 kernel_storage_[idx] = sinc_scale_factor * window;	194 ((pre_sinc == 0) ?

194 } else {	195 sinc_scale_factor :

195 kernel_storage_[idx] =	196 (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));

196 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;

197 }

198 }	197 }

199 }	198 }

200 }	199 }

201	200

202 void SincResampler::SetRatio(double io_sample_rate_ratio) {	201 void SincResampler::SetRatio(double io_sample_rate_ratio) {

203 if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <	202 if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) <

204 std::numeric_limits<double>::epsilon()) {	203 std::numeric_limits<double>::epsilon()) {

205 return;	204 return;

206 }	205 }

207	206

208 io_sample_rate_ratio_ = io_sample_rate_ratio;	207 io_sample_rate_ratio_ = io_sample_rate_ratio;

209	208

210 // Optimize reinitialization by reusing values which are independent of	209 // Optimize reinitialization by reusing values which are independent of

211 // \|sinc_scale_factor\|. Provides a 3x speedup.	210 // \|sinc_scale_factor\|. Provides a 3x speedup.

212 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);	211 const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_);

213 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {	212 for (int offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) {

214 for (int i = 0; i < kKernelSize; ++i) {	213 for (int i = 0; i < kKernelSize; ++i) {

215 const int idx = i + offset_idx * kKernelSize;	214 const int idx = i + offset_idx * kKernelSize;

216 const float window = kernel_window_storage_[idx];	215 const float window = kernel_window_storage_[idx];

217 const float pre_sinc = kernel_pre_sinc_storage_[idx];	216 const float pre_sinc = kernel_pre_sinc_storage_[idx];

218	217

219 if (pre_sinc == 0) {	218 kernel_storage_[idx] = static_cast<float>(window *

220 kernel_storage_[idx] = sinc_scale_factor * window;	219 ((pre_sinc == 0) ?

221 } else {	220 sinc_scale_factor :

222 kernel_storage_[idx] =	221 (sin(sinc_scale_factor * pre_sinc) / pre_sinc)));

223 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;

224 }

225 }	222 }

226 }	223 }

227 }	224 }

228	225

229 void SincResampler::Resample(int frames, float* destination) {	226 void SincResampler::Resample(int frames, float* destination) {

230 int remaining_frames = frames;	227 int remaining_frames = frames;

231	228

232 // Step (1) -- Prime the input buffer at the start of the input stream.	229 // Step (1) -- Prime the input buffer at the start of the input stream.

233 if (!buffer_primed_ && remaining_frames) {	230 if (!buffer_primed_ && remaining_frames) {

234 read_cb_.Run(request_frames_, r0_);	231 read_cb_.Run(request_frames_, r0_);

235 buffer_primed_ = true;	232 buffer_primed_ = true;

236 }	233 }

237	234

238 // Step (2) -- Resample! const what we can outside of the loop for speed. It	235 // Step (2) -- Resample! const what we can outside of the loop for speed. It

239 // actually has an impact on ARM performance. See inner loop comment below.	236 // actually has an impact on ARM performance. See inner loop comment below.

240 const double current_io_ratio = io_sample_rate_ratio_;	237 const double current_io_ratio = io_sample_rate_ratio_;

241 const float* const kernel_ptr = kernel_storage_.get();	238 const float* const kernel_ptr = kernel_storage_.get();

242 while (remaining_frames) {	239 while (remaining_frames) {

243 // Note: The loop construct here can severely impact performance on ARM	240 // Note: The loop construct here can severely impact performance on ARM

244 // or when built with clang. See https://codereview.chromium.org/18566009/	241 // or when built with clang. See https://codereview.chromium.org/18566009/

245 int source_idx = virtual_source_idx_;	242 int source_idx = static_cast<int>(virtual_source_idx_);

246 while (source_idx < block_size_) {	243 while (source_idx < block_size_) {

247 // \|virtual_source_idx_\| lies in between two kernel offsets so figure out	244 // \|virtual_source_idx_\| lies in between two kernel offsets so figure out

248 // what they are.	245 // what they are.

249 const double subsample_remainder = virtual_source_idx_ - source_idx;	246 const double subsample_remainder = virtual_source_idx_ - source_idx;

250	247

251 const double virtual_offset_idx =	248 const double virtual_offset_idx =

252 subsample_remainder * kKernelOffsetCount;	249 subsample_remainder * kKernelOffsetCount;

253 const int offset_idx = virtual_offset_idx;	250 const int offset_idx = static_cast<int>(virtual_offset_idx);

254	251

255 // We'll compute "convolutions" for the two kernels which straddle	252 // We'll compute "convolutions" for the two kernels which straddle

256 // \|virtual_source_idx_\|.	253 // \|virtual_source_idx_\|.

257 const float* const k1 = kernel_ptr + offset_idx * kKernelSize;	254 const float* const k1 = kernel_ptr + offset_idx * kKernelSize;

258 const float* const k2 = k1 + kKernelSize;	255 const float* const k2 = k1 + kKernelSize;

259	256

260 // Ensure \|k1\|, \|k2\| are 16-byte aligned for SIMD usage. Should always be	257 // Ensure \|k1\|, \|k2\| are 16-byte aligned for SIMD usage. Should always be

261 // true so long as kKernelSize is a multiple of 16.	258 // true so long as kKernelSize is a multiple of 16.

262 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);	259 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);

263 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);	260 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);

264	261

265 // Initialize input pointer based on quantized \|virtual_source_idx_\|.	262 // Initialize input pointer based on quantized \|virtual_source_idx_\|.

266 const float* const input_ptr = r1_ + source_idx;	263 const float* const input_ptr = r1_ + source_idx;

267	264

268 // Figure out how much to weight each kernel's "convolution".	265 // Figure out how much to weight each kernel's "convolution".

269 const double kernel_interpolation_factor =	266 const double kernel_interpolation_factor =

270 virtual_offset_idx - offset_idx;	267 virtual_offset_idx - offset_idx;

271 *destination++ = CONVOLVE_FUNC(	268 *destination++ = CONVOLVE_FUNC(

272 input_ptr, k1, k2, kernel_interpolation_factor);	269 input_ptr, k1, k2, kernel_interpolation_factor);

273	270

274 // Advance the virtual index.	271 // Advance the virtual index.

275 virtual_source_idx_ += current_io_ratio;	272 virtual_source_idx_ += current_io_ratio;

276 source_idx = virtual_source_idx_;	273 source_idx = static_cast<int>(virtual_source_idx_);

277	274

278 if (!--remaining_frames)	275 if (!--remaining_frames)

279 return;	276 return;

280 }	277 }

281	278

282 // Wrap back around to the start.	279 // Wrap back around to the start.

283 DCHECK_GE(virtual_source_idx_, block_size_);	280 DCHECK_GE(virtual_source_idx_, block_size_);

284 virtual_source_idx_ -= block_size_;	281 virtual_source_idx_ -= block_size_;

285	282

286 // Step (3) -- Copy r3_, r4_ to r1_, r2_.	283 // Step (3) -- Copy r3_, r4_ to r1_, r2_.

287 // This wraps the last input frames back to the start of the buffer.	284 // This wraps the last input frames back to the start of the buffer.

288 memcpy(r1_, r3_, sizeof(input_buffer_.get()) kKernelSize);	285 memcpy(r1_, r3_, sizeof(input_buffer_.get()) kKernelSize);

289	286

290 // Step (4) -- Reinitialize regions if necessary.	287 // Step (4) -- Reinitialize regions if necessary.

291 if (r0_ == r2_)	288 if (r0_ == r2_)

292 UpdateRegions(true);	289 UpdateRegions(true);

293	290

294 // Step (5) -- Refresh the buffer with more input.	291 // Step (5) -- Refresh the buffer with more input.

295 read_cb_.Run(request_frames_, r0_);	292 read_cb_.Run(request_frames_, r0_);

296 }	293 }

297 }	294 }

298	295

299 int SincResampler::ChunkSize() const {	296 int SincResampler::ChunkSize() const {

300 return block_size_ / io_sample_rate_ratio_;	297 return static_cast<int>(block_size_ / io_sample_rate_ratio_);

301 }	298 }

302	299

303 void SincResampler::Flush() {	300 void SincResampler::Flush() {

304 virtual_source_idx_ = 0;	301 virtual_source_idx_ = 0;

305 buffer_primed_ = false;	302 buffer_primed_ = false;

306 memset(input_buffer_.get(), 0,	303 memset(input_buffer_.get(), 0,

307 sizeof(input_buffer_.get()) input_buffer_size_);	304 sizeof(input_buffer_.get()) input_buffer_size_);

308 UpdateRegions(false);	305 UpdateRegions(false);

309 }	306 }

310	307

311 float SincResampler::Convolve_C(const float* input_ptr, const float* k1,	308 float SincResampler::Convolve_C(const float* input_ptr, const float* k1,

312 const float* k2,	309 const float* k2,

313 double kernel_interpolation_factor) {	310 double kernel_interpolation_factor) {

314 float sum1 = 0;	311 float sum1 = 0;

315 float sum2 = 0;	312 float sum2 = 0;

316	313

317 // Generate a single output sample. Unrolling this loop hurt performance in	314 // Generate a single output sample. Unrolling this loop hurt performance in

318 // local testing.	315 // local testing.

319 int n = kKernelSize;	316 int n = kKernelSize;

320 while (n--) {	317 while (n--) {

321 sum1 += input_ptr *k1++;	318 sum1 += input_ptr *k1++;

322 sum2 += input_ptr++ *k2++;	319 sum2 += input_ptr++ *k2++;

323 }	320 }

324	321

325 // Linearly interpolate the two "convolutions".	322 // Linearly interpolate the two "convolutions".

326 return (1.0 - kernel_interpolation_factor) * sum1	323 return static_cast<float>((1.0 - kernel_interpolation_factor) * sum1 +

327 + kernel_interpolation_factor * sum2;	324 kernel_interpolation_factor * sum2);

328 }	325 }

329	326

330 #if defined(ARCH_CPU_X86_FAMILY)	327 #if defined(ARCH_CPU_X86_FAMILY)

331 float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,	328 float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,

332 const float* k2,	329 const float* k2,

333 double kernel_interpolation_factor) {	330 double kernel_interpolation_factor) {

334 __m128 m_input;	331 __m128 m_input;

335 __m128 m_sums1 = _mm_setzero_ps();	332 __m128 m_sums1 = _mm_setzero_ps();

336 __m128 m_sums2 = _mm_setzero_ps();	333 __m128 m_sums2 = _mm_setzero_ps();

337	334

338 // Based on \|input_ptr\| alignment, we need to use loadu or load. Unrolling	335 // Based on \|input_ptr\| alignment, we need to use loadu or load. Unrolling

339 // these loops hurt performance in local testing.	336 // these loops hurt performance in local testing.

340 if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {	337 if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {

341 for (int i = 0; i < kKernelSize; i += 4) {	338 for (int i = 0; i < kKernelSize; i += 4) {

342 m_input = _mm_loadu_ps(input_ptr + i);	339 m_input = _mm_loadu_ps(input_ptr + i);

343 m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));	340 m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));

344 m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));	341 m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));

345 }	342 }

346 } else {	343 } else {

347 for (int i = 0; i < kKernelSize; i += 4) {	344 for (int i = 0; i < kKernelSize; i += 4) {

348 m_input = _mm_load_ps(input_ptr + i);	345 m_input = _mm_load_ps(input_ptr + i);

349 m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));	346 m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));

350 m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));	347 m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));

351 }	348 }

352 }	349 }

353	350

354 // Linearly interpolate the two "convolutions".	351 // Linearly interpolate the two "convolutions".

355 m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(1.0 - kernel_interpolation_factor));	352 m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(

356 m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(kernel_interpolation_factor));	353 static_cast<float>(1.0 - kernel_interpolation_factor)));

	354 m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(

	355 static_cast<float>(kernel_interpolation_factor)));

357 m_sums1 = _mm_add_ps(m_sums1, m_sums2);	356 m_sums1 = _mm_add_ps(m_sums1, m_sums2);

358	357

359 // Sum components together.	358 // Sum components together.

360 float result;	359 float result;

361 m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);	360 m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);

362 _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(	361 _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(

363 m_sums2, m_sums2, 1)));	362 m_sums2, m_sums2, 1)));

364	363

365 return result;	364 return result;

366 }	365 }

(...skipping 20 matching lines...) Expand all Loading...
387 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),	386 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),

388 m_sums2, vmovq_n_f32(kernel_interpolation_factor));	387 m_sums2, vmovq_n_f32(kernel_interpolation_factor));

389	388

390 // Sum components together.	389 // Sum components together.

391 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));	390 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));

392 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);	391 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);

393 }	392 }

394 #endif	393 #endif

395	394

396 } // namespace media	395 } // namespace media

OLD	NEW

« no previous file with comments | « media/audio/audio_parameters.cc ('k') | media/cast/cast_defines.h » ('j') | media/cast/cast_defines.h » ('J')