media/base/sinc_resampler.cc - Issue 12491018: Merge 187732 "Don't use magic statics in SincResampler for threa..."

Side by Side Diff: media/base/sinc_resampler.cc

Issue 12491018: Merge 187732 "Don't use magic statics in SincResampler for threa..." (Closed) Base URL: svn://svn.chromium.org/chrome/branches/1410/src/

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4 //	4 //

5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_):	5 // Input buffer layout, dividing the total buffer into regions (r0_ - r5_):

6 //	6 //

7 // \|----------------\|-----------------------------------------\|----------------\|	7 // \|----------------\|-----------------------------------------\|----------------\|

8 //	8 //

9 // kBlockSize + kKernelSize / 2	9 // kBlockSize + kKernelSize / 2

10 // <--------------------------------------------------------->	10 // <--------------------------------------------------------->

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
50 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)	50 SincResampler::SincResampler(double io_sample_rate_ratio, const ReadCB& read_cb)

51 : io_sample_rate_ratio_(io_sample_rate_ratio),	51 : io_sample_rate_ratio_(io_sample_rate_ratio),

52 virtual_source_idx_(0),	52 virtual_source_idx_(0),

53 buffer_primed_(false),	53 buffer_primed_(false),

54 read_cb_(read_cb),	54 read_cb_(read_cb),

55 // Create input buffers with a 16-byte alignment for SSE optimizations.	55 // Create input buffers with a 16-byte alignment for SSE optimizations.

56 kernel_storage_(static_cast<float*>(	56 kernel_storage_(static_cast<float*>(

57 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),	57 base::AlignedAlloc(sizeof(float) * kKernelStorageSize, 16))),

58 input_buffer_(static_cast<float*>(	58 input_buffer_(static_cast<float*>(

59 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))),	59 base::AlignedAlloc(sizeof(float) * kBufferSize, 16))),

	60 #if defined(ARCH_CPU_X86_FAMILY) && !defined(__SSE__)

	61 convolve_proc_(base::CPU().has_sse() ? Convolve_SSE : Convolve_C),

	62 #endif

60 // Setup various region pointers in the buffer (see diagram above).	63 // Setup various region pointers in the buffer (see diagram above).

61 r0_(input_buffer_.get() + kKernelSize / 2),	64 r0_(input_buffer_.get() + kKernelSize / 2),

62 r1_(input_buffer_.get()),	65 r1_(input_buffer_.get()),

63 r2_(r0_),	66 r2_(r0_),

64 r3_(r0_ + kBlockSize - kKernelSize / 2),	67 r3_(r0_ + kBlockSize - kKernelSize / 2),

65 r4_(r0_ + kBlockSize),	68 r4_(r0_ + kBlockSize),

66 r5_(r0_ + kKernelSize / 2) {	69 r5_(r0_ + kKernelSize / 2) {

67 // Ensure kKernelSize is a multiple of 32 for easy SSE optimizations; causes	70 // Ensure kKernelSize is a multiple of 32 for easy SSE optimizations; causes

68 // r0_ and r5_ (used for input) to always be 16-byte aligned by virtue of	71 // r0_ and r5_ (used for input) to always be 16-byte aligned by virtue of

69 // input_buffer_ being 16-byte aligned.	72 // input_buffer_ being 16-byte aligned.

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
129 double x = (i - subsample_offset) / kKernelSize;	132 double x = (i - subsample_offset) / kKernelSize;

130 double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2	133 double window = kA0 - kA1 * cos(2.0 * M_PI * x) + kA2

131 * cos(4.0 * M_PI * x);	134 * cos(4.0 * M_PI * x);

132	135

133 // Window the sinc() function and store at the correct offset.	136 // Window the sinc() function and store at the correct offset.

134 kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window;	137 kernel_storage_.get()[i + offset_idx * kKernelSize] = sinc * window;

135 }	138 }

136 }	139 }

137 }	140 }

138	141

	142 // If we know the minimum architecture avoid function hopping for CPU detection.

	143 #if defined(ARCH_CPU_X86_FAMILY)

	144 #if defined(__SSE__)

	145 #define CONVOLVE_FUNC Convolve_SSE

	146 #else

	147 // X86 CPU detection required. \|convolve_proc_\| will be set upon construction.

	148 // TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed.

	149 #define CONVOLVE_FUNC convolve_proc_

	150 #endif

	151 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

	152 #define CONVOLVE_FUNC Convolve_NEON

	153 #else

	154 // Unknown architecture.

	155 #define CONVOLVE_FUNC Convolve_C

	156 #endif

	157

139 void SincResampler::Resample(float* destination, int frames) {	158 void SincResampler::Resample(float* destination, int frames) {

140 int remaining_frames = frames;	159 int remaining_frames = frames;

141	160

142 // Step (1) -- Prime the input buffer at the start of the input stream.	161 // Step (1) -- Prime the input buffer at the start of the input stream.

143 if (!buffer_primed_) {	162 if (!buffer_primed_) {

144 read_cb_.Run(r0_, kBlockSize + kKernelSize / 2);	163 read_cb_.Run(r0_, kBlockSize + kKernelSize / 2);

145 buffer_primed_ = true;	164 buffer_primed_ = true;

146 }	165 }

147	166

148 // Step (2) -- Resample!	167 // Step (2) -- Resample!

149 while (remaining_frames) {	168 while (remaining_frames) {

150 while (virtual_source_idx_ < kBlockSize) {	169 while (virtual_source_idx_ < kBlockSize) {

151 // \|virtual_source_idx_\| lies in between two kernel offsets so figure out	170 // \|virtual_source_idx_\| lies in between two kernel offsets so figure out

152 // what they are.	171 // what they are.

153 int source_idx = static_cast<int>(virtual_source_idx_);	172 int source_idx = static_cast<int>(virtual_source_idx_);

154 double subsample_remainder = virtual_source_idx_ - source_idx;	173 double subsample_remainder = virtual_source_idx_ - source_idx;

155	174

156 double virtual_offset_idx = subsample_remainder * kKernelOffsetCount;	175 double virtual_offset_idx = subsample_remainder * kKernelOffsetCount;

157 int offset_idx = static_cast<int>(virtual_offset_idx);	176 int offset_idx = static_cast<int>(virtual_offset_idx);

158	177

159 // We'll compute "convolutions" for the two kernels which straddle	178 // We'll compute "convolutions" for the two kernels which straddle

160 // \|virtual_source_idx_\|.	179 // \|virtual_source_idx_\|.

161 float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;	180 float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;

162 float* k2 = k1 + kKernelSize;	181 float* k2 = k1 + kKernelSize;

163	182

	183 // Ensure \|k1\|, \|k2\| are 16-byte aligned for SIMD usage. Should always be

	184 // true so long as kKernelSize is a multiple of 16.

	185 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);

	186 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);

	187

164 // Initialize input pointer based on quantized \|virtual_source_idx_\|.	188 // Initialize input pointer based on quantized \|virtual_source_idx_\|.

165 float* input_ptr = r1_ + source_idx;	189 float* input_ptr = r1_ + source_idx;

166	190

167 // Figure out how much to weight each kernel's "convolution".	191 // Figure out how much to weight each kernel's "convolution".

168 double kernel_interpolation_factor = virtual_offset_idx - offset_idx;	192 double kernel_interpolation_factor = virtual_offset_idx - offset_idx;

169 *destination++ = Convolve(	193 *destination++ = CONVOLVE_FUNC(

170 input_ptr, k1, k2, kernel_interpolation_factor);	194 input_ptr, k1, k2, kernel_interpolation_factor);

171	195

172 // Advance the virtual index.	196 // Advance the virtual index.

173 virtual_source_idx_ += io_sample_rate_ratio_;	197 virtual_source_idx_ += io_sample_rate_ratio_;

174	198

175 if (!--remaining_frames)	199 if (!--remaining_frames)

176 return;	200 return;

177 }	201 }

178	202

179 // Wrap back around to the start.	203 // Wrap back around to the start.

180 virtual_source_idx_ -= kBlockSize;	204 virtual_source_idx_ -= kBlockSize;

181	205

182 // Step (3) Copy r3_ to r1_ and r4_ to r2_.	206 // Step (3) Copy r3_ to r1_ and r4_ to r2_.

183 // This wraps the last input frames back to the start of the buffer.	207 // This wraps the last input frames back to the start of the buffer.

184 memcpy(r1_, r3_, sizeof(input_buffer_.get()) (kKernelSize / 2));	208 memcpy(r1_, r3_, sizeof(input_buffer_.get()) (kKernelSize / 2));

185 memcpy(r2_, r4_, sizeof(input_buffer_.get()) (kKernelSize / 2));	209 memcpy(r2_, r4_, sizeof(input_buffer_.get()) (kKernelSize / 2));

186	210

187 // Step (4)	211 // Step (4)

188 // Refresh the buffer with more input.	212 // Refresh the buffer with more input.

189 read_cb_.Run(r5_, kBlockSize);	213 read_cb_.Run(r5_, kBlockSize);

190 }	214 }

191 }	215 }

192	216

	217 #undef CONVOLVE_FUNC

	218

193 int SincResampler::ChunkSize() const {	219 int SincResampler::ChunkSize() const {

194 return kBlockSize / io_sample_rate_ratio_;	220 return kBlockSize / io_sample_rate_ratio_;

195 }	221 }

196	222

197 void SincResampler::Flush() {	223 void SincResampler::Flush() {

198 virtual_source_idx_ = 0;	224 virtual_source_idx_ = 0;

199 buffer_primed_ = false;	225 buffer_primed_ = false;

200 memset(input_buffer_.get(), 0, sizeof(input_buffer_.get()) kBufferSize);	226 memset(input_buffer_.get(), 0, sizeof(input_buffer_.get()) kBufferSize);

201 }	227 }

202	228

203 float SincResampler::Convolve(const float* input_ptr, const float* k1,

204 const float* k2,

205 double kernel_interpolation_factor) {

206 // Ensure \|k1\|, \|k2\| are 16-byte aligned for SSE usage. Should always be true

207 // so long as kKernelSize is a multiple of 16.

208 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k1) & 0x0F);

209 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(k2) & 0x0F);

210

211 // Rely on function level static initialization to keep ConvolveProc selection

212 // thread safe.

213 typedef float (ConvolveProc)(const float src, const float* k1,

214 const float* k2,

215 double kernel_interpolation_factor);

216 #if defined(ARCH_CPU_X86_FAMILY)

217 #if defined(__SSE__)

218 static const ConvolveProc kConvolveProc = Convolve_SSE;

219 #else

220 static const ConvolveProc kConvolveProc =

221 base::CPU().has_sse() ? Convolve_SSE : Convolve_C;

222 #endif

223 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

224 static const ConvolveProc kConvolveProc = Convolve_NEON;

225 #else

226 static const ConvolveProc kConvolveProc = Convolve_C;

227 #endif

228

229 return kConvolveProc(input_ptr, k1, k2, kernel_interpolation_factor);

230 }

231

232 float SincResampler::Convolve_C(const float* input_ptr, const float* k1,	229 float SincResampler::Convolve_C(const float* input_ptr, const float* k1,

233 const float* k2,	230 const float* k2,

234 double kernel_interpolation_factor) {	231 double kernel_interpolation_factor) {

235 float sum1 = 0;	232 float sum1 = 0;

236 float sum2 = 0;	233 float sum2 = 0;

237	234

238 // Generate a single output sample. Unrolling this loop hurt performance in	235 // Generate a single output sample. Unrolling this loop hurt performance in

239 // local testing.	236 // local testing.

240 int n = kKernelSize;	237 int n = kKernelSize;

241 while (n--) {	238 while (n--) {

(...skipping 29 matching lines...) Expand all Loading...
271 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),	268 vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)),

272 m_sums2, vmovq_n_f32(kernel_interpolation_factor));	269 m_sums2, vmovq_n_f32(kernel_interpolation_factor));

273	270

274 // Sum components together.	271 // Sum components together.

275 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));	272 float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1));

276 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);	273 return vget_lane_f32(vpadd_f32(m_half, m_half), 0);

277 }	274 }

278 #endif	275 #endif

279	276

280 } // namespace media	277 } // namespace media

OLD	NEW

« no previous file with comments | « media/base/sinc_resampler.h ('k') | no next file » | no next file with comments »