media/filters/audio_renderer_algorithm.cc - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Side by Side Diff: media/filters/audio_renderer_algorithm.cc

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Avoid malloc in every iteration by defining some member varibles. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/filters/audio_renderer_algorithm.h"	5 #include "media/filters/audio_renderer_algorithm.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <cmath>	8 #include <cmath>

9	9

10 #include "base/logging.h"	10 #include "base/logging.h"

11 #include "base/memory/scoped_ptr.h"	11 #include "base/memory/scoped_ptr.h"

12 #include "media/audio/audio_util.h"	12 #include "media/audio/audio_util.h"

13 #include "media/base/audio_buffer.h"	13 #include "media/base/audio_buffer.h"

14 #include "media/base/audio_bus.h"	14 #include "media/base/audio_bus.h"

	15 #include "media/filters/wsola_internals.h"

15	16

16 namespace media {	17 namespace media {

17	18

18 // The starting size in frames for \|audio_buffer_\|. Previous usage maintained a

19 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we

20 // maintain this number of frames.

21 static const int kStartingBufferSizeInFrames = 16 * 512;

22

23 // The maximum size in frames for the \|audio_buffer_\|. Arbitrarily determined.	19 // The maximum size in frames for the \|audio_buffer_\|. Arbitrarily determined.

24 // This number represents 3 seconds of 96kHz/16 bit 7.1 surround sound.	20 // This number represents 3 seconds of 96kHz/16 bit 7.1 surround sound.

25 static const int kMaxBufferSizeInFrames = 3 * 96000;	21 static const int kMaxBufferSizeInFrames = 3 * 96000;

26	22

27 // Duration of audio segments used for crossfading (in seconds).

28 static const double kWindowDuration = 0.08;

29

30 // Duration of crossfade between audio segments (in seconds).

31 static const double kCrossfadeDuration = 0.008;

32

33 // Max/min supported playback rates for fast/slow audio. Audio outside of these	23 // Max/min supported playback rates for fast/slow audio. Audio outside of these

34 // ranges are muted.	24 // ranges are muted.

35 // Audio at these speeds would sound better under a frequency domain algorithm.	25 // Audio at these speeds would sound better under a frequency domain algorithm.

36 static const float kMinPlaybackRate = 0.5f;	26 static const float kMinPlaybackRate = 0.5f;

37 static const float kMaxPlaybackRate = 4.0f;	27 static const float kMaxPlaybackRate = 4.0f;

38	28

	29 // Overlap-and-add window size in milliseconds.

	30 static const int kOlaWindowSizeMs = 20;

	31

	32 // Size of search interval in milliseconds. The search interval is

	33 // [-delta delta] around \|output_index_\| * \|playback_rate_\|. So the search

	34 // interval is 2 * delta.

	35 static const int kWsolaSearchIntervalMs = 30;

	36

	37 // The starting size in frames for \|audio_buffer_\|. Previous usage maintained a

	38 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we

	39 // maintain this number of frames.

	40 static const int kStartingBufferSizeInFrames = 16 * 512;

	41

39 AudioRendererAlgorithm::AudioRendererAlgorithm()	42 AudioRendererAlgorithm::AudioRendererAlgorithm()

40 : channels_(0),	43 : channels_(0),

41 samples_per_second_(0),	44 samples_per_second_(0),

42 playback_rate_(0),	45 playback_rate_(0),

43 frames_in_crossfade_(0),

44 index_into_window_(0),

45 crossfade_frame_number_(0),

46 muted_(false),	46 muted_(false),

47 muted_partial_frame_(0),	47 muted_partial_frame_(0),

48 window_size_(0),	48 capacity_(kStartingBufferSizeInFrames),

49 capacity_(kStartingBufferSizeInFrames) {	49 output_index_(0),

	50 search_block_center_offset_(0),

	51 num_candidate_frames_(0),

	52 target_block_index_(0),

	53 ola_window_size_(0),

	54 ola_hop_size_(0),

	55 num_complete_frames_(0) {

50 }	56 }

51	57

52 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}	58 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}

53	59

54 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,	60 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,

55 const AudioParameters& params) {	61 const AudioParameters& params) {

56 CHECK(params.IsValid());	62 CHECK(params.IsValid());

57	63

58 channels_ = params.channels();	64 channels_ = params.channels();

59 samples_per_second_ = params.sample_rate();	65 samples_per_second_ = params.sample_rate();

60 SetPlaybackRate(initial_playback_rate);	66 SetPlaybackRate(initial_playback_rate);

61	67

62 window_size_ = samples_per_second_ * kWindowDuration;	68 num_candidate_frames_ =

63 frames_in_crossfade_ = samples_per_second_ * kCrossfadeDuration;	69 (kWsolaSearchIntervalMs * samples_per_second_) / 1000 + 1;

64 crossfade_buffer_ = AudioBus::Create(channels_, frames_in_crossfade_);	70

	71 // Make sure window size in an even number.

	72 ola_window_size_ = (kOlaWindowSizeMs * samples_per_second_ / 1000 / 2) * 2;

	73

	74 ola_hop_size_ = ola_window_size_ / 2;

	75

	76 search_block_center_offset_ = (num_candidate_frames_ - 1) / 2 + (

	77 ola_window_size_ / 2 - 1);
	marpan 2013/08/02 17:56:54 The second term is because of offset to first fram The second term is because of offset to first frame? turaj 2013/08/02 23:45:59 If we have L+1 candidates, it means we have to che If we have L+1 candidates, it means we have to check L/2 candidate blocks to the left of the search-block-center. The first sample of the left most candidate block is L/2 + (\|ola_window_size\|/2 - 1) to the left of the search-block-center. In another word, we are computing cross correlation of target-block (of size \|ola_window_size\|) with search-block for lags -L/2 to L/2 around the search-block-center. Therefore the left most sample needed for this computation is -(L/2 + (\|ola_window_size\|/2 - 1)). On 2013/08/02 17:56:54, marpan wrote: Show quoted text > The second term is because of offset to first frame?
	78

	79 ola_window_.reset(new float[ola_window_size_]);

	80 internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get());

	81

	82 transition_window_.reset(new float[ola_window_size_ * 2]);

	83 internal::GetSymmetricHanningWindow(2 * ola_window_size_,

	84 transition_window_.get());

	85

	86 wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_);

	87

	88 // Auxiliary containers.

	89 optimal_block_ = AudioBus::Create(channels_, ola_window_size_);

	90 search_block_ = AudioBus::Create(

	91 channels_, num_candidate_frames_ + (ola_window_size_ - 1));

	92 target_block_ = AudioBus::Create(channels_, ola_window_size_);

65 }	93 }

66	94

67 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {	95 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {

68 if (playback_rate_ == 0)	96 if (playback_rate_ == 0)

69 return 0;	97 return 0;

70	98

71 // Optimize the \|muted_\| case to issue a single clear instead of performing	99 // Optimize the \|muted_\| case to issue a single clear instead of performing

72 // the full crossfade and clearing each crossfaded frame.	100 // the full crossfade and clearing each crossfaded frame.

73 if (muted_) {	101 if (muted_) {

74 int frames_to_render =	102 int frames_to_render =

(...skipping 11 matching lines...) Expand all Loading...
86	114

87 // Determine the partial frame that remains to be skipped for next call. If	115 // Determine the partial frame that remains to be skipped for next call. If

88 // the user switches back to playing, it may be off time by this partial	116 // the user switches back to playing, it may be off time by this partial

89 // frame, which would be undetectable. If they subsequently switch to	117 // frame, which would be undetectable. If they subsequently switch to

90 // another playback rate that mutes, the code will attempt to line up the	118 // another playback rate that mutes, the code will attempt to line up the

91 // frames again.	119 // frames again.

92 muted_partial_frame_ -= seek_frames;	120 muted_partial_frame_ -= seek_frames;

93 return frames_to_render;	121 return frames_to_render;

94 }	122 }

95	123

96 int slower_step = ceil(window_size_ * playback_rate_);	124 int slower_step = ceil(ola_window_size_ * playback_rate_);

97 int faster_step = ceil(window_size_ / playback_rate_);	125 int faster_step = ceil(ola_window_size_ / playback_rate_);

98	126

99 // Optimize the most common \|playback_rate_\| ~= 1 case to use a single copy	127 // Optimize the most common \|playback_rate_\| ~= 1 case to use a single copy

100 // instead of copying frame by frame.	128 // instead of copying frame by frame.

101 if (window_size_ <= faster_step && slower_step >= window_size_) {	129 if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) {

102 const int frames_to_copy =	130 const int frames_to_copy =

103 std::min(audio_buffer_.frames(), requested_frames);	131 std::min(audio_buffer_.frames(), requested_frames);

104 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);	132 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);

105 DCHECK_EQ(frames_read, frames_to_copy);	133 DCHECK_EQ(frames_read, frames_to_copy);

106 return frames_read;	134 return frames_read;

107 }	135 }

108	136

109 int total_frames_rendered = 0;	137 int total_frames_rendered = WsolaOutput(requested_frames, dest);

110 while (total_frames_rendered < requested_frames) {

111 if (index_into_window_ >= window_size_)

112 ResetWindow();

113

114 int rendered_frames = 0;

115 if (window_size_ > faster_step) {

116 rendered_frames =

117 OutputFasterPlayback(dest,

118 total_frames_rendered,

119 requested_frames - total_frames_rendered,

120 window_size_,

121 faster_step);

122 } else if (slower_step < window_size_) {

123 rendered_frames =

124 OutputSlowerPlayback(dest,

125 total_frames_rendered,

126 requested_frames - total_frames_rendered,

127 slower_step,

128 window_size_);

129 } else {

130 NOTREACHED();

131 }

132

133 if (rendered_frames == 0)

134 break;

135

136 total_frames_rendered += rendered_frames;

137 }

138 return total_frames_rendered;	138 return total_frames_rendered;

139 }	139 }

140	140

141 void AudioRendererAlgorithm::ResetWindow() {

142 DCHECK_LE(index_into_window_, window_size_);

143 index_into_window_ = 0;

144 crossfade_frame_number_ = 0;

145 }

146

147 int AudioRendererAlgorithm::OutputFasterPlayback(AudioBus* dest,

148 int dest_offset,

149 int requested_frames,

150 int input_step,

151 int output_step) {

152 // Ensure we don't run into OOB read/write situation.

153 CHECK_GT(input_step, output_step);

154 DCHECK_LT(index_into_window_, window_size_);

155 DCHECK_GT(playback_rate_, 1.0);

156 DCHECK(!muted_);

157

158 if (audio_buffer_.frames() < 1)

159 return 0;

160

161 // The audio data is output in a series of windows. For sped-up playback,

162 // the window is comprised of the following phases:

163 //

164 // a) Output raw data.

165 // b) Save bytes for crossfade in \|crossfade_buffer_\|.

166 // c) Drop data.

167 // d) Output crossfaded audio leading up to the next window.

168 //

169 // The duration of each phase is computed below based on the \|window_size_\|

170 // and \|playback_rate_\|.

171 DCHECK_LE(frames_in_crossfade_, output_step);

172

173 // This is the index of the end of phase a, beginning of phase b.

174 int outtro_crossfade_begin = output_step - frames_in_crossfade_;

175

176 // This is the index of the end of phase b, beginning of phase c.

177 int outtro_crossfade_end = output_step;

178

179 // This is the index of the end of phase c, beginning of phase d.

180 // This phase continues until \|index_into_window_\| reaches \|window_size_\|, at

181 // which point the window restarts.

182 int intro_crossfade_begin = input_step - frames_in_crossfade_;

183

184 // a) Output raw frames if we haven't reached the crossfade section.

185 if (index_into_window_ < outtro_crossfade_begin) {

186 // Read as many frames as we can and return the count. If it's not enough,

187 // we will get called again.

188 const int frames_to_copy =

189 std::min(requested_frames, outtro_crossfade_begin - index_into_window_);

190 int copied = audio_buffer_.ReadFrames(frames_to_copy, dest_offset, dest);

191 index_into_window_ += copied;

192 return copied;

193 }

194

195 // b) Save outtro crossfade frames into intermediate buffer, but do not output

196 // anything to \|dest\|.

197 if (index_into_window_ < outtro_crossfade_end) {

198 // This phase only applies if there are bytes to crossfade.

199 DCHECK_GT(frames_in_crossfade_, 0);

200 int crossfade_start = index_into_window_ - outtro_crossfade_begin;

201 int crossfade_count = outtro_crossfade_end - index_into_window_;

202 int copied = audio_buffer_.ReadFrames(

203 crossfade_count, crossfade_start, crossfade_buffer_.get());

204 index_into_window_ += copied;

205

206 // Did we get all the frames we need? If not, return and let subsequent

207 // calls try to get the rest.

208 if (copied != crossfade_count)

209 return 0;

210 }

211

212 // c) Drop frames until we reach the intro crossfade section.

213 if (index_into_window_ < intro_crossfade_begin) {

214 // Check if there is enough data to skip all the frames needed. If not,

215 // return 0 and let subsequent calls try to skip it all.

216 int seek_frames = intro_crossfade_begin - index_into_window_;

217 if (audio_buffer_.frames() < seek_frames)

218 return 0;

219 audio_buffer_.SeekFrames(seek_frames);

220

221 // We've dropped all the frames that need to be dropped.

222 index_into_window_ += seek_frames;

223 }

224

225 // d) Crossfade and output a frame, as long as we have data.

226 if (audio_buffer_.frames() < 1)

227 return 0;

228 DCHECK_GT(frames_in_crossfade_, 0);

229 DCHECK_LT(index_into_window_, window_size_);

230

231 int offset_into_buffer = index_into_window_ - intro_crossfade_begin;

232 int copied = audio_buffer_.ReadFrames(1, dest_offset, dest);

233 DCHECK_EQ(copied, 1);

234 CrossfadeFrame(crossfade_buffer_.get(),

235 offset_into_buffer,

236 dest,

237 dest_offset,

238 offset_into_buffer);

239 index_into_window_ += copied;

240 return copied;

241 }

242

243 int AudioRendererAlgorithm::OutputSlowerPlayback(AudioBus* dest,

244 int dest_offset,

245 int requested_frames,

246 int input_step,

247 int output_step) {

248 // Ensure we don't run into OOB read/write situation.

249 CHECK_LT(input_step, output_step);

250 DCHECK_LT(index_into_window_, window_size_);

251 DCHECK_LT(playback_rate_, 1.0);

252 DCHECK_NE(playback_rate_, 0);

253 DCHECK(!muted_);

254

255 if (audio_buffer_.frames() < 1)

256 return 0;

257

258 // The audio data is output in a series of windows. For slowed down playback,

259 // the window is comprised of the following phases:

260 //

261 // a) Output raw data.

262 // b) Output and save bytes for crossfade in \|crossfade_buffer_\|.

263 // c) Output* raw data.

264 // d) Output* crossfaded audio leading up to the next window.

265 //

266 // * Phases c) and d) do not progress \|audio_buffer_\|'s cursor so that the

267 // \|audio_buffer_\|'s cursor is in the correct place for the next window.

268 //

269 // The duration of each phase is computed below based on the \|window_size_\|

270 // and \|playback_rate_\|.

271 DCHECK_LE(frames_in_crossfade_, input_step);

272

273 // This is the index of the end of phase a, beginning of phase b.

274 int intro_crossfade_begin = input_step - frames_in_crossfade_;

275

276 // This is the index of the end of phase b, beginning of phase c.

277 int intro_crossfade_end = input_step;

278

279 // This is the index of the end of phase c, beginning of phase d.

280 // This phase continues until \|index_into_window_\| reaches \|window_size_\|, at

281 // which point the window restarts.

282 int outtro_crossfade_begin = output_step - frames_in_crossfade_;

283

284 // a) Output raw frames.

285 if (index_into_window_ < intro_crossfade_begin) {

286 // Read as many frames as we can and return the count. If it's not enough,

287 // we will get called again.

288 const int frames_to_copy =

289 std::min(requested_frames, intro_crossfade_begin - index_into_window_);

290 int copied = audio_buffer_.ReadFrames(frames_to_copy, dest_offset, dest);

291 index_into_window_ += copied;

292 return copied;

293 }

294

295 // b) Save the raw frames for the intro crossfade section, then copy the

296 // same frames to \|dest\|.

297 if (index_into_window_ < intro_crossfade_end) {

298 const int frames_to_copy =

299 std::min(requested_frames, intro_crossfade_end - index_into_window_);

300 int offset = index_into_window_ - intro_crossfade_begin;

301 int copied = audio_buffer_.ReadFrames(

302 frames_to_copy, offset, crossfade_buffer_.get());

303 crossfade_buffer_->CopyPartialFramesTo(offset, copied, dest_offset, dest);

304 index_into_window_ += copied;

305 return copied;

306 }

307

308 // c) Output a raw frame into \|dest\| without advancing the \|audio_buffer_\|

309 // cursor.

310 int audio_buffer_offset = index_into_window_ - intro_crossfade_end;

311 DCHECK_GE(audio_buffer_offset, 0);

312 if (audio_buffer_.frames() <= audio_buffer_offset)

313 return 0;

314 int copied =

315 audio_buffer_.PeekFrames(1, audio_buffer_offset, dest_offset, dest);

316 DCHECK_EQ(1, copied);

317

318 // d) Crossfade the next frame of \|crossfade_buffer_\| into \|dest\| if we've

319 // reached the outtro crossfade section of the window.

320 if (index_into_window_ >= outtro_crossfade_begin) {

321 int offset_into_crossfade_buffer =

322 index_into_window_ - outtro_crossfade_begin;

323 CrossfadeFrame(dest,

324 dest_offset,

325 crossfade_buffer_.get(),

326 offset_into_crossfade_buffer,

327 offset_into_crossfade_buffer);

328 }

329

330 index_into_window_ += copied;

331 return copied;

332 }

333

334 void AudioRendererAlgorithm::CrossfadeFrame(AudioBus* intro,

335 int intro_offset,

336 AudioBus* outtro,

337 int outtro_offset,

338 int fade_offset) {

339 float crossfade_ratio =

340 static_cast<float>(fade_offset) / frames_in_crossfade_;

341 for (int channel = 0; channel < channels_; ++channel) {

342 outtro->channel(channel)[outtro_offset] =

343 (1.0f - crossfade_ratio) * intro->channel(channel)[intro_offset] +

344 (crossfade_ratio) * outtro->channel(channel)[outtro_offset];

345 }

346 }

347

348 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {	141 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {

349 DCHECK_GE(new_rate, 0);	142 DCHECK_GE(new_rate, 0);

350 playback_rate_ = new_rate;	143 // Round it to two decimal digits.

	144 playback_rate_ = floor(new_rate * 100.f + 0.5f) / 100;

351 muted_ =	145 muted_ =

352 playback_rate_ < kMinPlaybackRate \|\| playback_rate_ > kMaxPlaybackRate;	146 playback_rate_ < kMinPlaybackRate \|\| playback_rate_ > kMaxPlaybackRate;

353

354 ResetWindow();

355 }	147 }

356	148

357 void AudioRendererAlgorithm::FlushBuffers() {	149 void AudioRendererAlgorithm::FlushBuffers() {

358 ResetWindow();

359

360 // Clear the queue of decoded packets (releasing the buffers).	150 // Clear the queue of decoded packets (releasing the buffers).

361 audio_buffer_.Clear();	151 audio_buffer_.Clear();

	152 output_index_ = 0;

	153 target_block_index_ = 0;

	154 wsola_output_->Zero();

	155 num_complete_frames_ = 0;

362 }	156 }

363	157

364 base::TimeDelta AudioRendererAlgorithm::GetTime() {	158 base::TimeDelta AudioRendererAlgorithm::GetTime() {

365 return audio_buffer_.current_time();	159 return audio_buffer_.current_time();

366 }	160 }

367	161

368 void AudioRendererAlgorithm::EnqueueBuffer(	162 void AudioRendererAlgorithm::EnqueueBuffer(

369 const scoped_refptr<AudioBuffer>& buffer_in) {	163 const scoped_refptr<AudioBuffer>& buffer_in) {

370 DCHECK(!buffer_in->end_of_stream());	164 DCHECK(!buffer_in->end_of_stream());

371 audio_buffer_.Append(buffer_in);	165 audio_buffer_.Append(buffer_in);

372 }	166 }

373	167

374 bool AudioRendererAlgorithm::IsQueueFull() {	168 bool AudioRendererAlgorithm::IsQueueFull() {

375 return audio_buffer_.frames() >= capacity_;	169 return audio_buffer_.frames() >= capacity_;

376 }	170 }

377	171

378 void AudioRendererAlgorithm::IncreaseQueueCapacity() {	172 void AudioRendererAlgorithm::IncreaseQueueCapacity() {

379 capacity_ = std::min(2 * capacity_, kMaxBufferSizeInFrames);	173 capacity_ = std::min(2 * capacity_, kMaxBufferSizeInFrames);

380 }	174 }

381	175

	176 bool AudioRendererAlgorithm::CanPerformWsola() const {

	177 const int search_block_size = num_candidate_frames_ + (ola_window_size_ - 1);

	178 const int frames = audio_buffer_.frames();

	179 if (target_block_index_ + ola_window_size_ <= frames &&

	180 GetSearchRegionIndex() + search_block_size <= frames) {

	181 return true;

	182 }

	183 return false;

	184 }

	185

	186 int AudioRendererAlgorithm::WsolaOutput(int requested_frames, AudioBus* dest) {

	187 DCHECK_EQ(channels_, dest->channels());

	188

	189 // First read the frames which are ready.

	190 int rendered_frames = ReadWsolaOutput(requested_frames, 0, dest);

	191 while (rendered_frames < requested_frames && CanPerformWsola()) {

	192 rendered_frames += Wsola(requested_frames - rendered_frames,

	193 rendered_frames, dest);

	194 }

	195 return rendered_frames;

	196 }

	197

	198 int AudioRendererAlgorithm::Wsola(

	199 int requested_frames, int dest_offset, AudioBus* dest) {

	200 if (!GetOptimalBlock())

	201 return 0; // We cannot continue as \|optimal_block\| is not found.

	202 // There was not enough data.

	203

	204 // Overlap-and-add.

	205 for (int k = 0; k < channels_; ++k) {

	206 float* ch_opt_frame = optimal_block_->channel(k);

	207 float* ch_output = wsola_output_->channel(k) + num_complete_frames_;

	208 for (int n = 0; n < ola_hop_size_; ++n) {

	209 ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] +

	210 ch_opt_frame[n] * ola_window_[n];

	211 }

	212

	213 // Copy the second half to the output.

	214 memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_],

	215 sizeof(ch_opt_frame) ola_hop_size_);

	216 }

	217

	218 num_complete_frames_ += ola_hop_size_;

	219 output_index_ += ola_hop_size_;

	220

	221 RemoveOldInputFrames();

	222 return ReadWsolaOutput(requested_frames, dest_offset, dest);

	223 }

	224

	225 int AudioRendererAlgorithm::GetSearchRegionIndex() const {

	226 // Center of the search region, in frames.

	227 const int search_block_center_index = static_cast<int>(floor(

	228 output_index_ * playback_rate_ + 0.5));

	229

	230 // Index of the beginning of the search region, in frames.

	231 return search_block_center_index - search_block_center_offset_;

	232 }

	233

	234 void AudioRendererAlgorithm::RemoveOldInputFrames() {

	235 const int earliest_used_index = std::min(target_block_index_,

	236 GetSearchRegionIndex());

	237

	238 if (earliest_used_index < 0)

	239 return; // Nothing to remove

	240

	241 // Assuming \|playback_rate_\| * 100 == floor(\|playback_rate_\| * 100)

	242 // that is \|playback_rate_\| is represented by 2 decimal digits, only.

	243 // We eliminate blocks of size 100 * \|playback_rate_\| from input.

	244 const int kOutputFramesPerBlock = 100;

	245 const int input_frames_per_block =

	246 static_cast<int>(floor(playback_rate_ * kOutputFramesPerBlock + 0.5f));

	247 const int blocks_to_remove = earliest_used_index / input_frames_per_block;

	248 const int input_frames_to_remove = input_frames_per_block * blocks_to_remove;

	249

	250 // Remove frames from input and adjust indices accordingly.

	251 audio_buffer_.SeekFrames(input_frames_to_remove);

	252 target_block_index_ -= input_frames_to_remove;

	253

	254 // Adjust output index.

	255 output_index_ -= kOutputFramesPerBlock * blocks_to_remove;

	256 DCHECK_GE(output_index_, 0);

	257 }

	258

	259 int AudioRendererAlgorithm::ReadWsolaOutput(

	260 int requested_frames, int dest_offset, AudioBus* dest) {

	261 int rendered_frames = std::min(num_complete_frames_, requested_frames);

	262

	263 if (rendered_frames == 0)

	264 return 0; // There is nothing to read from \|wsola_output_\|, return.

	265

	266 wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest);

	267

	268 // Remove the frames which are read.

	269 int frames_to_move = wsola_output_->frames() - rendered_frames;

	270 for (int k = 0; k < channels_; ++k) {

	271 float* ch = wsola_output_->channel(k);

	272 memmove(ch, &ch[rendered_frames], sizeof(ch) frames_to_move);

	273 }

	274 num_complete_frames_ -= rendered_frames;

	275 return rendered_frames;

	276 }

	277

	278 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const {

	279 const int search_block_index = GetSearchRegionIndex();

	280 const int search_block_size = num_candidate_frames_ + (ola_window_size_ - 1);

	281

	282 if (target_block_index_ >= search_block_index &&

	283 target_block_index_ + ola_window_size_ <=

	284 search_block_index + search_block_size) {

	285 return true;

	286 }

	287 return false;

	288 }

	289

	290 bool AudioRendererAlgorithm::GetOptimalBlock() {

	291 int optimal_index = 0;

	292 if (TargetIsWithinSearchRegion()) {

	293 optimal_index = target_block_index_;

	294 // Get the optimal window.

	295 if (!PeekAudioWithZeroAppend(optimal_index, optimal_block_.get()))

	296 return false;

	297 } else {

	298 if (!PeekAudioWithZeroAppend(target_block_index_, target_block_.get()))

	299 return false;

	300 const int search_region_index = GetSearchRegionIndex();
	marpan 2013/08/02 17:56:54 You use "search_block_index" for this above, bette You use "search_block_index" for this above, better to use same name. turaj 2013/08/02 23:45:59 Absolutely. On 2013/08/02 17:56:54, marpan wrote: Absolutely. On 2013/08/02 17:56:54, marpan wrote: Show quoted text > You use "search_block_index" for this above, better to use same name.
	301

	302 if (!PeekAudioWithZeroAppend(search_region_index, search_block_.get()))

	303 return false;

	304

	305 int last_optimal = target_block_index_ - ola_hop_size_ -

	306 search_region_index;

	307 internal::Interval exclude_iterval = std::make_pair(last_optimal - 80,

	308 last_optimal + 80);

	309 // \|optimal_index\| is in frames and it is relative to the beginning

	310 // of the \|search_block_\|.

	311 optimal_index = internal::OptimalIndex(

	312 search_block_.get(), target_block_.get(), exclude_iterval);

	313

	314 // Translate \|index\| w.r.t. the beginning of \|audio_buffer_\|.

	315 optimal_index += search_region_index;

	316

	317 // Get the optimal window.

	318 PeekAudioWithZeroAppend(optimal_index, optimal_block_.get());

	319

	320 // Make a transition from target window to the optimal window if different.
	marpan 2013/08/02 17:56:54 you mean...from target block to the optimal block. you mean...from target block to the optimal block..? turaj 2013/08/02 23:45:59 You right, my mistake. On 2013/08/02 17:56:54, ma You right, my mistake. On 2013/08/02 17:56:54, marpan wrote: Show quoted text > you mean...from target block to the optimal block..?
	321 // Target window has the best continuation to the current current output.
	marpan 2013/08/02 17:56:54 Target block instead of "window" Target block instead of "window" marpan 2013/08/02 17:56:54 Remove one of the "current" Remove one of the "current" turaj 2013/08/02 23:45:59 Done. Show quoted text On 2013/08/02 17:56:54, marpan wrote: > Target block instead of "window" Done. turaj 2013/08/02 23:45:59 Done. Show quoted text On 2013/08/02 17:56:54, marpan wrote: > Remove one of the "current" Done.
	322 // Optimal block is the most similar block to the target, however, it might

	323 // introduce some discontinuity when over-lap-added. Therefore, we combine

	324 // them for a smoother transition.

	325 for (int k = 0; k < channels_; ++k) {

	326 float* ch_opt = optimal_block_->channel(k);

	327 float* ch_target = target_block_->channel(k);

	328 for (int n = 0; n < ola_window_size_; ++n) {

	329 ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] *
	marpan 2013/08/02 17:56:54 May want to comment about transition_window. Is it May want to comment about transition_window. Is it of size 2L to get varying weight such that weighting term favors target near n=0 and optimal near L? turaj 2013/08/02 23:45:59 Done. Show quoted text On 2013/08/02 17:56:54, marpan wrote: > May want to comment about transition_window. Is it of size 2L to get varying > weight such that weighting term favors target near n=0 and optimal near L? > Done.
	330 transition_window_[ola_window_size_ + n];
	marpan 2013/08/02 17:56:54 No change needed for this comment. Just wondering No change needed for this comment. Just wondering if some measure can be used in OptimalIndex() selection that incorporates both similarity and discontinuity/smoothing constraint, so to avoid this extra smoothing step (with preset weighting term) after OptimalIndex(). turaj 2013/08/02 23:45:59 I guess one can do something along the lines you s I guess one can do something along the lines you suggest. It you accept my notation the optimal_block at the end of this function is optimal_block = RampOutWindow * target_block + RampInWindow * most_similar; So this suggest that instead of maximizing DotProduct(target_block, candidate_block) to find most_similar, one might maximize DotProduct(target_block, RampInWindow * candidate_block). To save complexity one can evaluate DotProduct(target_block * candidate_block, RampInWindow). I can test this, if you think it makes sense. On 2013/08/02 17:56:54, marpan wrote: Show quoted text > No change needed for this comment. Just wondering if some measure can be used in > OptimalIndex() selection that incorporates both similarity and > discontinuity/smoothing constraint, so to avoid this extra smoothing step (with > preset weighting term) after OptimalIndex(). marpan 2013/08/06 17:14:10 No need to make any change for this comment. Show quoted text On 2013/08/02 23:45:59, turaj wrote: > I guess one can do something along the lines you suggest. It you accept my > notation the optimal_block at the end of this function is > > optimal_block = RampOutWindow * target_block + RampInWindow * most_similar; > > So this suggest that instead of maximizing DotProduct(target_block, > candidate_block) to find most_similar, one might maximize > DotProduct(target_block, RampInWindow * candidate_block). To save complexity one > can evaluate DotProduct(target_block * candidate_block, RampInWindow). > > I can test this, if you think it makes sense. > > > > > On 2013/08/02 17:56:54, marpan wrote: > > No change needed for this comment. Just wondering if some measure can be used > in > > OptimalIndex() selection that incorporates both similarity and > > discontinuity/smoothing constraint, so to avoid this extra smoothing step > (with > > preset weighting term) after OptimalIndex(). > No need to make any change for this comment.
	331 }

	332 }

	333 }

	334

	335 // Next target is one hop ahead of the current optimal.

	336 target_block_index_ = optimal_index + ola_hop_size_;

	337 return true;

	338 }

	339

	340 bool AudioRendererAlgorithm::PeekAudioWithZeroAppend(

	341 int read_offset_frames, AudioBus* dest) {

	342 int num_frames = dest->frames();

	343 if (read_offset_frames + num_frames > audio_buffer_.frames())

	344 return false;

	345

	346 int write_offset = 0;

	347 int num_frames_to_read = dest->frames();

	348 if (read_offset_frames < 0) {

	349 int num_zero_frames_appended = std::min(-read_offset_frames,

	350 num_frames_to_read);

	351 read_offset_frames = 0;

	352 num_frames_to_read -= num_zero_frames_appended;

	353 write_offset = num_zero_frames_appended;

	354 dest->ZeroFrames(num_zero_frames_appended);

	355 }

	356 audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames,

	357 write_offset, dest);

	358 return true;

	359 }

	360

382 } // namespace media	361 } // namespace media

OLD	NEW