Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: media/base/sinc_resampler.cc

Issue 18566009: Optimize loop condition for SincResampler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Comments. const! Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/sinc_resampler.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_ 5 // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_
6 // and r4_ will move after the first load): 6 // and r4_ will move after the first load):
7 // 7 //
8 // |----------------|-----------------------------------------|----------------| 8 // |----------------|-----------------------------------------|----------------|
9 // 9 //
10 // request_frames_ 10 // request_frames_
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON) 131 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
132 #define CONVOLVE_FUNC Convolve_NEON 132 #define CONVOLVE_FUNC Convolve_NEON
133 void SincResampler::InitializeCPUSpecificFeatures() {} 133 void SincResampler::InitializeCPUSpecificFeatures() {}
134 #else 134 #else
135 // Unknown architecture. 135 // Unknown architecture.
136 #define CONVOLVE_FUNC Convolve_C 136 #define CONVOLVE_FUNC Convolve_C
137 void SincResampler::InitializeCPUSpecificFeatures() {} 137 void SincResampler::InitializeCPUSpecificFeatures() {}
138 #endif 138 #endif
139 139
140 SincResampler::SincResampler(double io_sample_rate_ratio, 140 SincResampler::SincResampler(double io_sample_rate_ratio,
141 size_t request_frames, 141 int request_frames,
142 const ReadCB& read_cb) 142 const ReadCB& read_cb)
143 : io_sample_rate_ratio_(io_sample_rate_ratio), 143 : io_sample_rate_ratio_(io_sample_rate_ratio),
144 read_cb_(read_cb), 144 read_cb_(read_cb),
145 request_frames_(request_frames), 145 request_frames_(request_frames),
146 input_buffer_size_(request_frames_ + kKernelSize), 146 input_buffer_size_(request_frames_ + kKernelSize),
147 // Create input buffers with a 16-byte alignment for SSE optimizations. 147 // Create input buffers with a 16-byte alignment for SSE optimizations.
148 kernel_storage_(static_cast<float*>( 148 kernel_storage_(static_cast<float*>(
149 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), 149 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
150 kernel_pre_sinc_storage_(static_cast<float*>( 150 kernel_pre_sinc_storage_(static_cast<float*>(
151 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), 151 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
152 kernel_window_storage_(static_cast<float*>( 152 kernel_window_storage_(static_cast<float*>(
153 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))), 153 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),
154 input_buffer_(static_cast<float*>( 154 input_buffer_(static_cast<float*>(
155 base::AlignedAlloc(sizeof(float) * input_buffer_size_, 16))), 155 base::AlignedAlloc(sizeof(float) * input_buffer_size_, 16))),
156 r1_(input_buffer_.get()), 156 r1_(input_buffer_.get()),
157 r2_(input_buffer_.get() + kKernelSize / 2) { 157 r2_(input_buffer_.get() + kKernelSize / 2) {
158 CHECK_GT(request_frames_, 0);
158 Flush(); 159 Flush();
159 CHECK_GT(block_size_, static_cast<size_t>(kKernelSize)) 160 CHECK_GT(block_size_, kKernelSize)
160 << "block_size must be greater than kKernelSize!"; 161 << "block_size must be greater than kKernelSize!";
161 162
162 memset(kernel_storage_.get(), 0, 163 memset(kernel_storage_.get(), 0,
163 sizeof(*kernel_storage_.get()) * kKernelStorageSize); 164 sizeof(*kernel_storage_.get()) * kKernelStorageSize);
164 memset(kernel_pre_sinc_storage_.get(), 0, 165 memset(kernel_pre_sinc_storage_.get(), 0,
165 sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize); 166 sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
166 memset(kernel_window_storage_.get(), 0, 167 memset(kernel_window_storage_.get(), 0,
167 sizeof(*kernel_window_storage_.get()) * kKernelStorageSize); 168 sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
168 169
169 InitializeKernel(); 170 InitializeKernel();
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
248 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc; 249 window * sin(sinc_scale_factor * pre_sinc) / pre_sinc;
249 } 250 }
250 } 251 }
251 } 252 }
252 } 253 }
253 254
254 void SincResampler::Resample(int frames, float* destination) { 255 void SincResampler::Resample(int frames, float* destination) {
255 int remaining_frames = frames; 256 int remaining_frames = frames;
256 257
257 // Step (1) -- Prime the input buffer at the start of the input stream. 258 // Step (1) -- Prime the input buffer at the start of the input stream.
258 if (!buffer_primed_) { 259 if (!buffer_primed_ && remaining_frames) {
259 read_cb_.Run(request_frames_, r0_); 260 read_cb_.Run(request_frames_, r0_);
260 buffer_primed_ = true; 261 buffer_primed_ = true;
261 } 262 }
262 263
263 // Step (2) -- Resample! 264 // Step (2) -- Resample! const what we can outside of the loop for speed. It
265 // actually has an impact on ARM performance. See inner loop comment below.
266 const double current_io_ratio = io_sample_rate_ratio_;
267 const float* const kernel_ptr = kernel_storage_.get();
264 while (remaining_frames) { 268 while (remaining_frames) {
265 while (virtual_source_idx_ < block_size_) { 269 // |i| may be negative if the last Resample() call ended on an iteration
270 // that put |virtual_source_idx_| over the limit.
271 //
272 // Note: The loop construct here can severely impact performance on ARM
273 // or when built with clang. See https://codereview.chromium.org/18566009/
274 for (int i = ceil((block_size_ - virtual_source_idx_) / current_io_ratio);
275 i > 0; --i) {
276 DCHECK_LT(virtual_source_idx_, block_size_);
277
266 // |virtual_source_idx_| lies in between two kernel offsets so figure out 278 // |virtual_source_idx_| lies in between two kernel offsets so figure out
267 // what they are. 279 // what they are.
268 const int source_idx = virtual_source_idx_; 280 const int source_idx = virtual_source_idx_;
269 const double subsample_remainder = virtual_source_idx_ - source_idx; 281 const double subsample_remainder = virtual_source_idx_ - source_idx;
270 282
271 const double virtual_offset_idx = 283 const double virtual_offset_idx =
272 subsample_remainder * kKernelOffsetCount; 284 subsample_remainder * kKernelOffsetCount;
273 const int offset_idx = virtual_offset_idx; 285 const int offset_idx = virtual_offset_idx;
274 286
275 // We'll compute "convolutions" for the two kernels which straddle 287 // We'll compute "convolutions" for the two kernels which straddle
276 // |virtual_source_idx_|. 288 // |virtual_source_idx_|.
277 const float* k1 = kernel_storage_.get() + offset_idx * kKernelSize; 289 const float* const k1 = kernel_ptr + offset_idx * kKernelSize;
278 const float* k2 = k1 + kKernelSize; 290 const float* const k2 = k1 + kKernelSize;
279 291
280 // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be 292 // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be
281 // true so long as kKernelSize is a multiple of 16. 293 // true so long as kKernelSize is a multiple of 16.
282 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F); 294 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);
283 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F); 295 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);
284 296
285 // Initialize input pointer based on quantized |virtual_source_idx_|. 297 // Initialize input pointer based on quantized |virtual_source_idx_|.
286 const float* input_ptr = r1_ + source_idx; 298 const float* const input_ptr = r1_ + source_idx;
287 299
288 // Figure out how much to weight each kernel's "convolution". 300 // Figure out how much to weight each kernel's "convolution".
289 const double kernel_interpolation_factor = 301 const double kernel_interpolation_factor =
290 virtual_offset_idx - offset_idx; 302 virtual_offset_idx - offset_idx;
291 *destination++ = CONVOLVE_FUNC( 303 *destination++ = CONVOLVE_FUNC(
292 input_ptr, k1, k2, kernel_interpolation_factor); 304 input_ptr, k1, k2, kernel_interpolation_factor);
293 305
294 // Advance the virtual index. 306 // Advance the virtual index.
295 virtual_source_idx_ += io_sample_rate_ratio_; 307 virtual_source_idx_ += current_io_ratio;
296 308
297 if (!--remaining_frames) 309 if (!--remaining_frames)
298 return; 310 return;
299 } 311 }
300 312
301 // Wrap back around to the start. 313 // Wrap back around to the start.
302 virtual_source_idx_ -= block_size_; 314 virtual_source_idx_ -= block_size_;
303 315
304 // Step (3) -- Copy r3_, r4_ to r1_, r2_. 316 // Step (3) -- Copy r3_, r4_ to r1_, r2_.
305 // This wraps the last input frames back to the start of the buffer. 317 // This wraps the last input frames back to the start of the buffer.
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), 382 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),
371 m_sums2, vmovq_n_f32(kernel_interpolation_factor)); 383 m_sums2, vmovq_n_f32(kernel_interpolation_factor));
372 384
373 // Sum components together. 385 // Sum components together.
374 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); 386 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));
375 return vget_lane_f32(vpadd_f32(m_half, m_half), 0); 387 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);
376 } 388 }
377 #endif 389 #endif
378 390
379 } // namespace media 391 } // namespace media
OLDNEW
« no previous file with comments | « media/base/sinc_resampler.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698