media/filters/audio_renderer_algorithm.cc - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Side by Side Diff: media/filters/audio_renderer_algorithm.cc

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« media/filters/audio_renderer_algorithm.h ('K') | « media/filters/audio_renderer_algorithm.h ('k') | media/filters/audio_renderer_algorithm_unittest.cc » ('j') | media/filters/audio_renderer_algorithm_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/filters/audio_renderer_algorithm.h"	5 #include "media/filters/audio_renderer_algorithm.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <cmath>	8 #include <cmath>

9	9

10 #include "base/logging.h"	10 #include "base/logging.h"

11 #include "base/memory/scoped_ptr.h"	11 #include "base/memory/scoped_ptr.h"

12 #include "media/audio/audio_util.h"	12 #include "media/audio/audio_util.h"

13 #include "media/base/audio_buffer.h"	13 #include "media/base/audio_buffer.h"

14 #include "media/base/audio_bus.h"	14 #include "media/base/audio_bus.h"

	15 #include "media/filters/audio_renderer_algorithm_util.h"

15	16

16 namespace media {	17 namespace media {

17	18

18 // The starting size in frames for \|audio_buffer_\|. Previous usage maintained a

19 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we

20 // maintain this number of frames.

21 static const int kStartingBufferSizeInFrames = 16 * 512;

22

23 // The maximum size in frames for the \|audio_buffer_\|. Arbitrarily determined.	19 // The maximum size in frames for the \|audio_buffer_\|. Arbitrarily determined.

24 // This number represents 3 seconds of 96kHz/16 bit 7.1 surround sound.	20 // This number represents 3 seconds of 96kHz/16 bit 7.1 surround sound.

25 static const int kMaxBufferSizeInFrames = 3 * 96000;	21 static const int kMaxBufferSizeInFrames = 3 * 96000;

26	22

27 // Duration of audio segments used for crossfading (in seconds).

28 static const double kWindowDuration = 0.08;

29

30 // Duration of crossfade between audio segments (in seconds).

31 static const double kCrossfadeDuration = 0.008;

32

33 // Max/min supported playback rates for fast/slow audio. Audio outside of these	23 // Max/min supported playback rates for fast/slow audio. Audio outside of these

34 // ranges are muted.	24 // ranges are muted.

35 // Audio at these speeds would sound better under a frequency domain algorithm.	25 // Audio at these speeds would sound better under a frequency domain algorithm.

36 static const float kMinPlaybackRate = 0.5f;	26 static const float kMinPlaybackRate = 0.5f;

37 static const float kMaxPlaybackRate = 4.0f;	27 static const float kMaxPlaybackRate = 4.0f;

38	28

	29 // Overlap-and-add window size in milliseconds.

	30 static const int kOlaWindowSizeMs = 25;

	31

	32 // Size of search interval in milliseconds. The search interval is

	33 // [-delta delta] around \|output_index_\| * \|playback_rate_\|. So the search

	34 // interval is 2 * delta.

	35 static const int kWsolaSearchIntervalMs = 30;

	36

	37 // The starting size in frames for \|audio_buffer_\|. Previous usage maintained a

	38 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we

	39 // maintain this number of frames.

	40 static const int kStartingBufferSizeInFrames = 16 * 512;
	ajm 2013/07/23 18:03:28 I assume this is the "frames as in samples" usage. I assume this is the "frames as in samples" usage. We don't want to mix. turaj 2013/07/29 22:09:57 The notion of frame, where frame _N_ is the set of The notion of frame, where frame _N_ is the set of sample _N_ of all channels, existed as you see. I tried to comply to that notion. On 2013/07/23 18:03:28, ajm wrote: Show quoted text > I assume this is the "frames as in samples" usage. We don't want to mix.
	41

39 AudioRendererAlgorithm::AudioRendererAlgorithm()	42 AudioRendererAlgorithm::AudioRendererAlgorithm()

40 : channels_(0),	43 : channels_(0),

41 samples_per_second_(0),	44 samples_per_second_(0),

42 playback_rate_(0),	45 playback_rate_(0),

43 frames_in_crossfade_(0),

44 index_into_window_(0),

45 crossfade_frame_number_(0),

46 muted_(false),	46 muted_(false),

47 muted_partial_frame_(0),	47 muted_partial_frame_(0),

48 window_size_(0),	48 capacity_(kStartingBufferSizeInFrames),

49 capacity_(kStartingBufferSizeInFrames) {	49 output_index_(0),

	50 search_region_center_offset_(0),

	51 num_candid_frames_(0),

	52 target_window_index_(0),

	53 ola_window_size_(0),

	54 ola_hop_size_(0),

	55 num_complete_frames_(0) {

50 }	56 }

51	57

52 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}	58 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}

53	59

54 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,	60 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,

55 const AudioParameters& params) {	61 const AudioParameters& params) {

56 CHECK(params.IsValid());	62 CHECK(params.IsValid());

57	63

58 channels_ = params.channels();	64 channels_ = params.channels();

59 samples_per_second_ = params.sample_rate();	65 samples_per_second_ = params.sample_rate();

60 SetPlaybackRate(initial_playback_rate);	66 SetPlaybackRate(initial_playback_rate);

61	67

62 window_size_ = samples_per_second_ * kWindowDuration;	68 num_candid_frames_ =

63 frames_in_crossfade_ = samples_per_second_ * kCrossfadeDuration;	69 (kWsolaSearchIntervalMs * samples_per_second_) / 1000 + 1;
	ajm 2013/07/23 18:03:28 What's the +1 for? What's the +1 for? turaj 2013/07/29 22:09:57 To make the search region symmetric around \|output To make the search region symmetric around \|output_index\| * \|playback_rate\|. There is no algorithmic significance regarding +1. It made my first implementation more straight forward. I might remove it in this implementation. On 2013/07/23 18:03:28, ajm wrote: Show quoted text > What's the +1 for?
64 crossfade_buffer_ = AudioBus::Create(channels_, frames_in_crossfade_);	70

	71 // Make sure window size in an even number.

	72 ola_window_size_ = static_cast<int>(

	73 floor(kOlaWindowSizeMs * samples_per_second_ / 1000 / 2)) * 2;
	ajm 2013/07/23 18:03:28 These are all integers, right? Shouldn't need the These are all integers, right? Shouldn't need the floor, or the cast. turaj 2013/07/29 22:09:57 Right. On 2013/07/23 18:03:28, ajm wrote: Right. On 2013/07/23 18:03:28, ajm wrote: Show quoted text > These are all integers, right? Shouldn't need the floor, or the cast.
	74

	75 ola_hop_size_ = ola_window_size_ / 2;

	76

	77 search_region_center_offset_ = (num_candid_frames_ - 1) / 2 + (

	78 ola_window_size_ / 2 - 1);

	79

	80 ola_window_.reset(new float[ola_window_size_]);

	81 HannSym(ola_window_size_, ola_window_.get());

	82

	83 transition_window_.reset(new float[ola_window_size_ * 2]);

	84 HannSym(2 * ola_window_size_, transition_window_.get());

	85

	86 wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_);

65 }	87 }

66	88

67 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {	89 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {

68 if (playback_rate_ == 0)	90 if (playback_rate_ == 0)

69 return 0;	91 return 0;

70	92

71 // Optimize the \|muted_\| case to issue a single clear instead of performing	93 // Optimize the \|muted_\| case to issue a single clear instead of performing

72 // the full crossfade and clearing each crossfaded frame.	94 // the full crossfade and clearing each crossfaded frame.

73 if (muted_) {	95 if (muted_) {

74 int frames_to_render =	96 int frames_to_render =

(...skipping 11 matching lines...) Expand all Loading...
86	108

87 // Determine the partial frame that remains to be skipped for next call. If	109 // Determine the partial frame that remains to be skipped for next call. If

88 // the user switches back to playing, it may be off time by this partial	110 // the user switches back to playing, it may be off time by this partial

89 // frame, which would be undetectable. If they subsequently switch to	111 // frame, which would be undetectable. If they subsequently switch to

90 // another playback rate that mutes, the code will attempt to line up the	112 // another playback rate that mutes, the code will attempt to line up the

91 // frames again.	113 // frames again.

92 muted_partial_frame_ -= seek_frames;	114 muted_partial_frame_ -= seek_frames;

93 return frames_to_render;	115 return frames_to_render;

94 }	116 }

95	117

96 int slower_step = ceil(window_size_ * playback_rate_);	118 int slower_step = ceil(ola_window_size_ * playback_rate_);

97 int faster_step = ceil(window_size_ / playback_rate_);	119 int faster_step = ceil(ola_window_size_ / playback_rate_);

98	120

99 // Optimize the most common \|playback_rate_\| ~= 1 case to use a single copy	121 // Optimize the most common \|playback_rate_\| ~= 1 case to use a single copy

100 // instead of copying frame by frame.	122 // instead of copying frame by frame.

101 if (window_size_ <= faster_step && slower_step >= window_size_) {	123 if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) {

102 const int frames_to_copy =	124 const int frames_to_copy =

103 std::min(audio_buffer_.frames(), requested_frames);	125 std::min(audio_buffer_.frames(), requested_frames);

104 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);	126 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);

105 DCHECK_EQ(frames_read, frames_to_copy);	127 DCHECK_EQ(frames_read, frames_to_copy);

106 return frames_read;	128 return frames_read;

107 }	129 }

108	130

109 int total_frames_rendered = 0;	131 int total_frames_rendered = WsolaOutput(requested_frames, dest);

110 while (total_frames_rendered < requested_frames) {

111 if (index_into_window_ >= window_size_)

112 ResetWindow();

113

114 int rendered_frames = 0;

115 if (window_size_ > faster_step) {

116 rendered_frames =

117 OutputFasterPlayback(dest,

118 total_frames_rendered,

119 requested_frames - total_frames_rendered,

120 window_size_,

121 faster_step);

122 } else if (slower_step < window_size_) {

123 rendered_frames =

124 OutputSlowerPlayback(dest,

125 total_frames_rendered,

126 requested_frames - total_frames_rendered,

127 slower_step,

128 window_size_);

129 } else {

130 NOTREACHED();

131 }

132

133 if (rendered_frames == 0)

134 break;

135

136 total_frames_rendered += rendered_frames;

137 }

138 return total_frames_rendered;	132 return total_frames_rendered;

139 }	133 }

140	134

141 void AudioRendererAlgorithm::ResetWindow() {

142 DCHECK_LE(index_into_window_, window_size_);

143 index_into_window_ = 0;

144 crossfade_frame_number_ = 0;

145 }

146

147 int AudioRendererAlgorithm::OutputFasterPlayback(AudioBus* dest,

148 int dest_offset,

149 int requested_frames,

150 int input_step,

151 int output_step) {

152 // Ensure we don't run into OOB read/write situation.

153 CHECK_GT(input_step, output_step);

154 DCHECK_LT(index_into_window_, window_size_);

155 DCHECK_GT(playback_rate_, 1.0);

156 DCHECK(!muted_);

157

158 if (audio_buffer_.frames() < 1)

159 return 0;

160

161 // The audio data is output in a series of windows. For sped-up playback,

162 // the window is comprised of the following phases:

163 //

164 // a) Output raw data.

165 // b) Save bytes for crossfade in \|crossfade_buffer_\|.

166 // c) Drop data.

167 // d) Output crossfaded audio leading up to the next window.

168 //

169 // The duration of each phase is computed below based on the \|window_size_\|

170 // and \|playback_rate_\|.

171 DCHECK_LE(frames_in_crossfade_, output_step);

172

173 // This is the index of the end of phase a, beginning of phase b.

174 int outtro_crossfade_begin = output_step - frames_in_crossfade_;

175

176 // This is the index of the end of phase b, beginning of phase c.

177 int outtro_crossfade_end = output_step;

178

179 // This is the index of the end of phase c, beginning of phase d.

180 // This phase continues until \|index_into_window_\| reaches \|window_size_\|, at

181 // which point the window restarts.

182 int intro_crossfade_begin = input_step - frames_in_crossfade_;

183

184 // a) Output raw frames if we haven't reached the crossfade section.

185 if (index_into_window_ < outtro_crossfade_begin) {

186 // Read as many frames as we can and return the count. If it's not enough,

187 // we will get called again.

188 const int frames_to_copy =

189 std::min(requested_frames, outtro_crossfade_begin - index_into_window_);

190 int copied = audio_buffer_.ReadFrames(frames_to_copy, dest_offset, dest);

191 index_into_window_ += copied;

192 return copied;

193 }

194

195 // b) Save outtro crossfade frames into intermediate buffer, but do not output

196 // anything to \|dest\|.

197 if (index_into_window_ < outtro_crossfade_end) {

198 // This phase only applies if there are bytes to crossfade.

199 DCHECK_GT(frames_in_crossfade_, 0);

200 int crossfade_start = index_into_window_ - outtro_crossfade_begin;

201 int crossfade_count = outtro_crossfade_end - index_into_window_;

202 int copied = audio_buffer_.ReadFrames(

203 crossfade_count, crossfade_start, crossfade_buffer_.get());

204 index_into_window_ += copied;

205

206 // Did we get all the frames we need? If not, return and let subsequent

207 // calls try to get the rest.

208 if (copied != crossfade_count)

209 return 0;

210 }

211

212 // c) Drop frames until we reach the intro crossfade section.

213 if (index_into_window_ < intro_crossfade_begin) {

214 // Check if there is enough data to skip all the frames needed. If not,

215 // return 0 and let subsequent calls try to skip it all.

216 int seek_frames = intro_crossfade_begin - index_into_window_;

217 if (audio_buffer_.frames() < seek_frames)

218 return 0;

219 audio_buffer_.SeekFrames(seek_frames);

220

221 // We've dropped all the frames that need to be dropped.

222 index_into_window_ += seek_frames;

223 }

224

225 // d) Crossfade and output a frame, as long as we have data.

226 if (audio_buffer_.frames() < 1)

227 return 0;

228 DCHECK_GT(frames_in_crossfade_, 0);

229 DCHECK_LT(index_into_window_, window_size_);

230

231 int offset_into_buffer = index_into_window_ - intro_crossfade_begin;

232 int copied = audio_buffer_.ReadFrames(1, dest_offset, dest);

233 DCHECK_EQ(copied, 1);

234 CrossfadeFrame(crossfade_buffer_.get(),

235 offset_into_buffer,

236 dest,

237 dest_offset,

238 offset_into_buffer);

239 index_into_window_ += copied;

240 return copied;

241 }

242

243 int AudioRendererAlgorithm::OutputSlowerPlayback(AudioBus* dest,

244 int dest_offset,

245 int requested_frames,

246 int input_step,

247 int output_step) {

248 // Ensure we don't run into OOB read/write situation.

249 CHECK_LT(input_step, output_step);

250 DCHECK_LT(index_into_window_, window_size_);

251 DCHECK_LT(playback_rate_, 1.0);

252 DCHECK_NE(playback_rate_, 0);

253 DCHECK(!muted_);

254

255 if (audio_buffer_.frames() < 1)

256 return 0;

257

258 // The audio data is output in a series of windows. For slowed down playback,

259 // the window is comprised of the following phases:

260 //

261 // a) Output raw data.

262 // b) Output and save bytes for crossfade in \|crossfade_buffer_\|.

263 // c) Output* raw data.

264 // d) Output* crossfaded audio leading up to the next window.

265 //

266 // * Phases c) and d) do not progress \|audio_buffer_\|'s cursor so that the

267 // \|audio_buffer_\|'s cursor is in the correct place for the next window.

268 //

269 // The duration of each phase is computed below based on the \|window_size_\|

270 // and \|playback_rate_\|.

271 DCHECK_LE(frames_in_crossfade_, input_step);

272

273 // This is the index of the end of phase a, beginning of phase b.

274 int intro_crossfade_begin = input_step - frames_in_crossfade_;

275

276 // This is the index of the end of phase b, beginning of phase c.

277 int intro_crossfade_end = input_step;

278

279 // This is the index of the end of phase c, beginning of phase d.

280 // This phase continues until \|index_into_window_\| reaches \|window_size_\|, at

281 // which point the window restarts.

282 int outtro_crossfade_begin = output_step - frames_in_crossfade_;

283

284 // a) Output raw frames.

285 if (index_into_window_ < intro_crossfade_begin) {

286 // Read as many frames as we can and return the count. If it's not enough,

287 // we will get called again.

288 const int frames_to_copy =

289 std::min(requested_frames, intro_crossfade_begin - index_into_window_);

290 int copied = audio_buffer_.ReadFrames(frames_to_copy, dest_offset, dest);

291 index_into_window_ += copied;

292 return copied;

293 }

294

295 // b) Save the raw frames for the intro crossfade section, then copy the

296 // same frames to \|dest\|.

297 if (index_into_window_ < intro_crossfade_end) {

298 const int frames_to_copy =

299 std::min(requested_frames, intro_crossfade_end - index_into_window_);

300 int offset = index_into_window_ - intro_crossfade_begin;

301 int copied = audio_buffer_.ReadFrames(

302 frames_to_copy, offset, crossfade_buffer_.get());

303 crossfade_buffer_->CopyPartialFramesTo(offset, copied, dest_offset, dest);

304 index_into_window_ += copied;

305 return copied;

306 }

307

308 // c) Output a raw frame into \|dest\| without advancing the \|audio_buffer_\|

309 // cursor.

310 int audio_buffer_offset = index_into_window_ - intro_crossfade_end;

311 DCHECK_GE(audio_buffer_offset, 0);

312 if (audio_buffer_.frames() <= audio_buffer_offset)

313 return 0;

314 int copied =

315 audio_buffer_.PeekFrames(1, audio_buffer_offset, dest_offset, dest);

316 DCHECK_EQ(1, copied);

317

318 // d) Crossfade the next frame of \|crossfade_buffer_\| into \|dest\| if we've

319 // reached the outtro crossfade section of the window.

320 if (index_into_window_ >= outtro_crossfade_begin) {

321 int offset_into_crossfade_buffer =

322 index_into_window_ - outtro_crossfade_begin;

323 CrossfadeFrame(dest,

324 dest_offset,

325 crossfade_buffer_.get(),

326 offset_into_crossfade_buffer,

327 offset_into_crossfade_buffer);

328 }

329

330 index_into_window_ += copied;

331 return copied;

332 }

333

334 void AudioRendererAlgorithm::CrossfadeFrame(AudioBus* intro,

335 int intro_offset,

336 AudioBus* outtro,

337 int outtro_offset,

338 int fade_offset) {

339 float crossfade_ratio =

340 static_cast<float>(fade_offset) / frames_in_crossfade_;

341 for (int channel = 0; channel < channels_; ++channel) {

342 outtro->channel(channel)[outtro_offset] =

343 (1.0f - crossfade_ratio) * intro->channel(channel)[intro_offset] +

344 (crossfade_ratio) * outtro->channel(channel)[outtro_offset];

345 }

346 }

347

348 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {	135 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {

349 DCHECK_GE(new_rate, 0);	136 DCHECK_GE(new_rate, 0);

350 playback_rate_ = new_rate;	137 playback_rate_ = new_rate;

	138 // Round it to two decimal digits.

	139 playback_rate_ = floor(playback_rate_ * 100.f + 0.5f) / 100;
	ajm 2013/07/23 18:03:28 Just use new_rate here directly. Why do you have t Just use new_rate here directly. Why do you have to truncate this? turaj 2013/07/29 22:09:57 Truncation is needed when it comes to removing fra Truncation is needed when it comes to removing frames (samples) from input. If \|P\| samples are removed from input then \|P\| / \|playback_rate\| samples should be removed form output (reduce \|output_index\| by that amount). \|P\| and \|P\| / \|playback_rate\| should both be integer for time scaling be accurate. Otherwise, input and output slowly drift. I haven't investigated what the result might be, but I guess we might lose lip-sync. So if we can simply avoid it, why not. By confining \|playback_rate\| to two decimals, which is sufficient for any application, \|P\| can be 100 * \|playback_rate\|. Otherwise we have to search for an integer that satisfies the above, and I didn't want to go along that path. Another solution would be to specify \|playback_rate\| by the ratio of two integer \|p\| and \|q\|. But I didn't want to touch clients of AudioRendererAlgorithm. On 2013/07/23 18:03:28, ajm wrote: Show quoted text > Just use new_rate here directly. Why do you have to truncate this?
351 muted_ =	140 muted_ =

352 playback_rate_ < kMinPlaybackRate \|\| playback_rate_ > kMaxPlaybackRate;	141 playback_rate_ < kMinPlaybackRate \|\| playback_rate_ > kMaxPlaybackRate;

353

354 ResetWindow();

355 }	142 }

356	143

357 void AudioRendererAlgorithm::FlushBuffers() {	144 void AudioRendererAlgorithm::FlushBuffers() {

358 ResetWindow();

359

360 // Clear the queue of decoded packets (releasing the buffers).	145 // Clear the queue of decoded packets (releasing the buffers).

361 audio_buffer_.Clear();	146 audio_buffer_.Clear();

	147 output_index_ = 0;

	148 target_window_index_ = 0;

	149 wsola_output_->Zero();

	150 num_complete_frames_ = 0;

362 }	151 }

363	152

364 base::TimeDelta AudioRendererAlgorithm::GetTime() {	153 base::TimeDelta AudioRendererAlgorithm::GetTime() {

365 return audio_buffer_.current_time();	154 return audio_buffer_.current_time();

366 }	155 }

367	156

368 void AudioRendererAlgorithm::EnqueueBuffer(	157 void AudioRendererAlgorithm::EnqueueBuffer(

369 const scoped_refptr<AudioBuffer>& buffer_in) {	158 const scoped_refptr<AudioBuffer>& buffer_in) {

370 DCHECK(!buffer_in->end_of_stream());	159 DCHECK(!buffer_in->end_of_stream());

371 audio_buffer_.Append(buffer_in);	160 audio_buffer_.Append(buffer_in);

372 }	161 }

373	162

374 bool AudioRendererAlgorithm::IsQueueFull() {	163 bool AudioRendererAlgorithm::IsQueueFull() {

375 return audio_buffer_.frames() >= capacity_;	164 return audio_buffer_.frames() >= capacity_;

376 }	165 }

377	166

378 void AudioRendererAlgorithm::IncreaseQueueCapacity() {	167 void AudioRendererAlgorithm::IncreaseQueueCapacity() {

379 capacity_ = std::min(2 * capacity_, kMaxBufferSizeInFrames);	168 capacity_ = std::min(2 * capacity_, kMaxBufferSizeInFrames);

380 }	169 }

381	170

	171 bool AudioRendererAlgorithm::CanPerformWsola() const {

	172 const int search_region_size = num_candid_frames_ + (ola_window_size_ - 1);

	173 const int frames = audio_buffer_.frames();

	174 if (target_window_index_ + ola_window_size_ <= frames &&

	175 GetSearchRegionIndex() + search_region_size <= frames) {

	176 return true;

	177 }

	178 return false;

	179 }

	180

	181 int AudioRendererAlgorithm::WsolaOutput(int requested_frames, AudioBus* dest) {

	182 DCHECK(channels_ == dest->channels());

	183

	184 int rendered_frames = ReadWsolaOutput(requested_frames, 0, dest);

	185 while (rendered_frames < requested_frames && CanPerformWsola()) {

	186 Wsola();

	187 rendered_frames += ReadWsolaOutput(requested_frames - rendered_frames,
	ajm 2013/07/23 18:03:28 Do you need to break these functions up? Do you need to break these functions up? turaj 2013/07/29 22:09:57 I can define "int Wsola(requested_frames, int outp I can define "int Wsola(requested_frames, int output_offset, AudioBus* dest)" and call ReadWsolaOutput() at the end of Wsola(). On 2013/07/23 18:03:28, ajm wrote: Show quoted text > Do you need to break these functions up?
	188 rendered_frames, dest);

	189 }

	190 return rendered_frames;

	191 }

	192

	193 void AudioRendererAlgorithm::Wsola() {

	194 // Holds the optimal Frame.

	195 scoped_ptr<AudioBus> optimal_frame = AudioBus::Create(
	ajm 2013/07/23 18:03:28 I'm not sure how AudioBus works, but do you want t I'm not sure how AudioBus works, but do you want to be creating one with every iteration? turaj 2013/07/29 22:09:57 It is not very expensive basically one malloc with It is not very expensive basically one malloc with some calculation to get the correct size to have correct alignment. I can define it member to avoid dynamic memory allocation, Dale what do you think? On 2013/07/23 18:03:28, ajm wrote: Show quoted text > I'm not sure how AudioBus works, but do you want to be creating one with every > iteration? DaleCurtis 2013/07/29 23:48:32 malloc is very expensive relative to the rest of t Show quoted text On 2013/07/29 22:09:57, turaj wrote: > It is not very expensive basically one malloc with some calculation to get the > correct size to have correct alignment. I can define it member to avoid dynamic > memory allocation, Dale what do you think? > > > On 2013/07/23 18:03:28, ajm wrote: > > I'm not sure how AudioBus works, but do you want to be creating one with every > > iteration? > malloc is very expensive relative to the rest of the costs here; please use a member variable.
	196 channels_, ola_window_size_);

	197 GetOptimalBlock(optimal_frame.get());

	198

	199 // Overlap-and-add.

	200 for(int k = 0; k < channels_; ++k) {

	201 float* ch_opt_frame = optimal_frame->channel(k);

	202 float* ch_output = wsola_output_->channel(k) + num_complete_frames_;

	203 for (int n = 0; n < ola_hop_size_; ++n) {

	204 ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] +

	205 ch_opt_frame[n] * ola_window_[n];

	206 }

	207

	208 // Copy the second half to the output.

	209 memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_],

	210 sizeof(ch_opt_frame) ola_hop_size_);

	211 }

	212

	213 num_complete_frames_ += ola_hop_size_;

	214 output_index_ += ola_hop_size_;

	215

	216 RemoveOldInputFrames();

	217 }

	218

	219 int AudioRendererAlgorithm::GetSearchRegionIndex() const {

	220 // Center of the search region, in frames.

	221 const int search_region_center_index = static_cast<int>(floor(

	222 output_index_ * playback_rate_ + 0.5));

	223

	224 // Index of the beginning of the search region, in frames.

	225 return search_region_center_index - search_region_center_offset_;

	226 }

	227

	228 void AudioRendererAlgorithm::RemoveOldInputFrames() {

	229 const int earliest_used_index = std::min(target_window_index_,

	230 GetSearchRegionIndex());

	231

	232 if (earliest_used_index < 0)

	233 return; // Nothing to remove

	234

	235 // Assuming \|playback_rate_\| * 100 == floor(\|playback_rate_\| * 100)

	236 // that is \|playback_rate_\| is represented by 2 decimal digits, only.

	237 // We eliminate blocks of size 100 * \|playback_rate_\| from input.

	238 const int kOutputFramesPerBlock = 100;

	239 const int input_frames_per_block =

	240 static_cast<int>(floor(playback_rate_ * kOutputFramesPerBlock + 0.5f));

	241 const int blocks_to_remove = earliest_used_index / input_frames_per_block;

	242 const int input_frames_to_remove = input_frames_per_block * blocks_to_remove;

	243

	244 // Remove frames from input and adjust indices accordingly.

	245 audio_buffer_.SeekFrames(input_frames_to_remove);

	246 target_window_index_ -= input_frames_to_remove;

	247

	248 // Adjust output index.

	249 output_index_ -= kOutputFramesPerBlock * blocks_to_remove;

	250 DCHECK(output_index_ >= 0);

	251 }

	252

	253 int AudioRendererAlgorithm::ReadWsolaOutput(

	254 int requested_frames, int output_offset, AudioBus* dest) {

	255 int rendered_frames = std::min(num_complete_frames_, requested_frames);

	256

	257 if (rendered_frames == 0)

	258 return 0; // There is nothing to read from \|wsola_output_\|, return.

	259

	260 wsola_output_->CopyPartialFramesTo(0, rendered_frames, output_offset, dest);

	261

	262 // Remove the frames which are read.

	263 int frames_to_move = wsola_output_->frames() - rendered_frames;

	264 for (int k = 0; k < channels_; ++k) {

	265 float* ch = wsola_output_->channel(k);

	266 memmove(ch, &ch[rendered_frames], sizeof(ch) frames_to_move);

	267 }

	268 num_complete_frames_ -= rendered_frames;

	269 return rendered_frames;

	270 }

	271

	272 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const {

	273 const int search_region_index = GetSearchRegionIndex();

	274 const int search_region_size = num_candid_frames_ + (ola_window_size_ - 1);

	275

	276 if (target_window_index_ >= search_region_index &&

	277 target_window_index_ + ola_window_size_ <=

	278 search_region_index + search_region_size) {

	279 return true;

	280 }

	281 return false;

	282 }

	283

	284 void AudioRendererAlgorithm::GetOptimalBlock(AudioBus* optimal_block) {

	285 int optimal_index = 0;

	286 if (TargetIsWithinSearchRegion()) {

	287 optimal_index = target_window_index_;

	288 // Get the optimal window.

	289 PeekAudioWithZerroAppend(optimal_index, optimal_block);

	290 } else {

	291 // Holds the target window.

	292 scoped_ptr<AudioBus> target_window = AudioBus::Create(

	293 channels_, ola_window_size_);

	294 PeekAudioWithZerroAppend(target_window_index_, target_window.get());

	295

	296 const int search_region_index = GetSearchRegionIndex();

	297

	298 // Holds a segment of the signal that similarity measure is operated upon.

	299 scoped_ptr<AudioBus> search_segment = AudioBus::Create(

	300 channels_, num_candid_frames_ + (ola_window_size_ - 1));

	301 PeekAudioWithZerroAppend(search_region_index, search_segment.get());

	302

	303 int last_optimal = target_window_index_ - ola_hop_size_ -

	304 search_region_index;

	305 interval exclude_iterval = std::make_pair(last_optimal - 80,

	306 last_optimal + 80);

	307

	308 // \|optimal_index\| is in frames and it is relative to the beginning

	309 // of the \|search_segment\|.

	310 optimal_index = OptimalIndex(search_segment.get(), target_window.get(),

	311 exclude_iterval);

	312

	313 // Translate \|index\| w.r.t. the beginning of \|audio_buffer_\|.

	314 optimal_index += search_region_index;

	315

	316 // Get the optimal window.

	317 PeekAudioWithZerroAppend(optimal_index, optimal_block);
	ajm 2013/07/23 18:03:28 Zerro -> Zero Zerro -> Zero turaj 2013/07/29 22:09:57 Done. Show quoted text On 2013/07/23 18:03:28, ajm wrote: > Zerro -> Zero Done.
	318

	319 // Make a transition from target window to the optimal window if different.

	320 // Target window has the best continuation to the current current output.

	321 // Optimal block is the most similar block to the target, however, it might

	322 // introduce some discontinuity when over-lap-added. Therefore, we combine

	323 // them for a smoother transition.

	324 for (int k = 0; k < channels_; ++k) {

	325 float* ch_opt = optimal_block->channel(k);

	326 float* ch_target = target_window->channel(k);

	327 for (int n = 0; n < ola_window_size_; ++n) {

	328 ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] *

	329 transition_window_[ola_window_size_ + n];

	330 }

	331 }

	332 }

	333

	334 // Next target is one hop ahead of the current optimal.

	335 target_window_index_ = optimal_index + ola_hop_size_;

	336 }

	337

	338 bool AudioRendererAlgorithm::PeekAudioWithZerroAppend(
	ajm 2013/07/23 18:03:28 You don't check the return value of this anywhere. You don't check the return value of this anywhere. Better to make it void and DCHECK? turaj 2013/07/29 22:09:57 I decided to check it and propagate result. On 2 I decided to check it and propagate result. On 2013/07/23 18:03:28, ajm wrote: Show quoted text > You don't check the return value of this anywhere. Better to make it void and > DCHECK?
	339 int read_offset_frames, AudioBus* dest) {

	340 int num_frames = dest->frames();

	341 if (read_offset_frames + num_frames > audio_buffer_.frames())

	342 return false;

	343

	344 int write_offset = 0;

	345 int num_frames_to_read = dest->frames();

	346 if (read_offset_frames < 0) {

	347 int num_zero_frames_appended = std::min(-read_offset_frames,

	348 num_frames_to_read);

	349 read_offset_frames = 0;

	350 num_frames_to_read -= num_zero_frames_appended;

	351 write_offset = num_zero_frames_appended;

	352 dest->ZeroFrames(num_zero_frames_appended);

	353 }

	354 audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames,

	355 write_offset, dest);

	356 return true;

	357 }

	358

382 } // namespace media	359 } // namespace media

OLD	NEW