OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/filters/audio_renderer_algorithm.h" | 5 #include "media/filters/audio_renderer_algorithm.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <cmath> | 8 #include <cmath> |
9 | 9 |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
12 #include "media/audio/audio_util.h" | 12 #include "media/audio/audio_util.h" |
13 #include "media/base/audio_buffer.h" | 13 #include "media/base/audio_buffer.h" |
14 #include "media/base/audio_bus.h" | 14 #include "media/base/audio_bus.h" |
| 15 #include "media/filters/wsola_internals.h" |
15 | 16 |
16 namespace media { | 17 namespace media { |
17 | 18 |
18 // The starting size in frames for |audio_buffer_|. Previous usage maintained a | 19 |
19 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we | 20 // Waveform Similarity Overlap-and-add (WSOLA). |
20 // maintain this number of frames. | 21 // |
21 static const int kStartingBufferSizeInFrames = 16 * 512; | 22 // One WSOLA iteration |
| 23 // |
| 24 // 1) Extract |target_block_| as input frames at indices |
| 25 // [|target_block_index_|, |target_block_index_| + |ola_window_size_|). |
| 26 // Note that |target_block_| is the "natural" continuation of the output. |
| 27 // |
| 28 // 2) Extract |search_block_| as input frames at indices |
| 29 // [|search_block_index_|, |
| 30 // |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|). |
| 31 // |
| 32 // 3) Find a block within the |search_block_| that is most similar |
| 33 // to |target_block_|. Let |optimal_index| be the index of such block and |
| 34 // write it to |optimal_block_|. |
| 35 // |
| 36 // 4) Update: |
| 37 // |optimal_block_| = |transition_window_| * |target_block_| + |
| 38 // (1 - |transition_window_|) * |optimal_block_|. |
| 39 // |
| 40 // 5) Overlap-and-add |optimal_block_| to the |wsola_output_|. |
| 41 // |
| 42 // 6) Update: |
| 43 // |target_block_| = |optimal_index| + |ola_window_size_| / 2. |
| 44 // |output_index_| = |output_index_| + |ola_window_size_| / 2, |
| 45 // |search_block_center_offset_| = |output_index_| * |playback_rate_|, and |
| 46 // |search_block_index_| = |search_block_center_offset_| - |
| 47 // |search_block_center_offset_|. |
22 | 48 |
23 // The maximum size in frames for the |audio_buffer_|. Arbitrarily determined. | 49 // The maximum size in frames for the |audio_buffer_|. Arbitrarily determined. |
24 // This number represents 3 seconds of 96kHz/16 bit 7.1 surround sound. | 50 // This number represents 3 seconds of 96kHz/16 bit 7.1 surround sound. |
25 static const int kMaxBufferSizeInFrames = 3 * 96000; | 51 static const int kMaxBufferSizeInFrames = 3 * 96000; |
26 | 52 |
27 // Duration of audio segments used for crossfading (in seconds). | |
28 static const double kWindowDuration = 0.08; | |
29 | |
30 // Duration of crossfade between audio segments (in seconds). | |
31 static const double kCrossfadeDuration = 0.008; | |
32 | |
33 // Max/min supported playback rates for fast/slow audio. Audio outside of these | 53 // Max/min supported playback rates for fast/slow audio. Audio outside of these |
34 // ranges are muted. | 54 // ranges are muted. |
35 // Audio at these speeds would sound better under a frequency domain algorithm. | 55 // Audio at these speeds would sound better under a frequency domain algorithm. |
36 static const float kMinPlaybackRate = 0.5f; | 56 static const float kMinPlaybackRate = 0.5f; |
37 static const float kMaxPlaybackRate = 4.0f; | 57 static const float kMaxPlaybackRate = 4.0f; |
38 | 58 |
| 59 // Overlap-and-add window size in milliseconds. |
| 60 static const int kOlaWindowSizeMs = 20; |
| 61 |
| 62 // Size of search interval in milliseconds. The search interval is |
| 63 // [-delta delta] around |output_index_| * |playback_rate_|. So the search |
| 64 // interval is 2 * delta. |
| 65 static const int kWsolaSearchIntervalMs = 30; |
| 66 |
| 67 // The starting size in frames for |audio_buffer_|. Previous usage maintained a |
| 68 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we |
| 69 // maintain this number of frames. |
| 70 static const int kStartingBufferSizeInFrames = 16 * 512; |
| 71 |
39 AudioRendererAlgorithm::AudioRendererAlgorithm() | 72 AudioRendererAlgorithm::AudioRendererAlgorithm() |
40 : channels_(0), | 73 : channels_(0), |
41 samples_per_second_(0), | 74 samples_per_second_(0), |
42 playback_rate_(0), | 75 playback_rate_(0), |
43 frames_in_crossfade_(0), | |
44 index_into_window_(0), | |
45 crossfade_frame_number_(0), | |
46 muted_(false), | 76 muted_(false), |
47 muted_partial_frame_(0), | 77 muted_partial_frame_(0), |
48 window_size_(0), | 78 capacity_(kStartingBufferSizeInFrames), |
49 capacity_(kStartingBufferSizeInFrames) { | 79 output_time_(0.0), |
| 80 search_block_center_offset_(0), |
| 81 search_block_index_(0), |
| 82 num_candidate_blocks_(0), |
| 83 target_block_index_(0), |
| 84 ola_window_size_(0), |
| 85 ola_hop_size_(0), |
| 86 num_complete_frames_(0) { |
50 } | 87 } |
51 | 88 |
52 AudioRendererAlgorithm::~AudioRendererAlgorithm() {} | 89 AudioRendererAlgorithm::~AudioRendererAlgorithm() {} |
53 | 90 |
54 void AudioRendererAlgorithm::Initialize(float initial_playback_rate, | 91 void AudioRendererAlgorithm::Initialize(float initial_playback_rate, |
55 const AudioParameters& params) { | 92 const AudioParameters& params) { |
56 CHECK(params.IsValid()); | 93 CHECK(params.IsValid()); |
57 | 94 |
58 channels_ = params.channels(); | 95 channels_ = params.channels(); |
59 samples_per_second_ = params.sample_rate(); | 96 samples_per_second_ = params.sample_rate(); |
60 SetPlaybackRate(initial_playback_rate); | 97 SetPlaybackRate(initial_playback_rate); |
| 98 num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000; |
| 99 ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000; |
61 | 100 |
62 window_size_ = samples_per_second_ * kWindowDuration; | 101 // Make sure window size in an even number. |
63 frames_in_crossfade_ = samples_per_second_ * kCrossfadeDuration; | 102 ola_window_size_ += ola_window_size_ & 1; |
64 crossfade_buffer_ = AudioBus::Create(channels_, frames_in_crossfade_); | 103 ola_hop_size_ = ola_window_size_ / 2; |
| 104 |
| 105 // |num_candidate_blocks_| / 2 is the offset of the center of the search |
| 106 // block to the center of the first (left most) candidate block. The offset |
| 107 // of the center of a candidate block to its left most point is |
| 108 // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in |
| 109 // our convention the center belongs to the left half, so we need to subtract |
| 110 // one frame to get the correct offset. |
| 111 // |
| 112 // Search Block |
| 113 // <-------------------------------------------> |
| 114 // |
| 115 // |ola_window_size_| / 2 - 1 |
| 116 // <---- |
| 117 // |
| 118 // |num_candidate_blocks_| / 2 |
| 119 // <---------------- |
| 120 // center |
| 121 // X----X----------------X---------------X-----X |
| 122 // <----------> <----------> |
| 123 // Candidate ... Candidate |
| 124 // 1, ... |num_candidate_blocks_| |
| 125 search_block_center_offset_ = num_candidate_blocks_ / 2 + |
| 126 (ola_window_size_ / 2 - 1); |
| 127 |
| 128 ola_window_.reset(new float[ola_window_size_]); |
| 129 internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get()); |
| 130 |
| 131 transition_window_.reset(new float[ola_window_size_ * 2]); |
| 132 internal::GetSymmetricHanningWindow(2 * ola_window_size_, |
| 133 transition_window_.get()); |
| 134 |
| 135 wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_); |
| 136 wsola_output_->Zero(); // Initialize for overlap-and-add of the first block. |
| 137 |
| 138 // Auxiliary containers. |
| 139 optimal_block_ = AudioBus::Create(channels_, ola_window_size_); |
| 140 search_block_ = AudioBus::Create( |
| 141 channels_, num_candidate_blocks_ + (ola_window_size_ - 1)); |
| 142 target_block_ = AudioBus::Create(channels_, ola_window_size_); |
65 } | 143 } |
66 | 144 |
67 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) { | 145 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) { |
68 if (playback_rate_ == 0) | 146 if (playback_rate_ == 0) |
69 return 0; | 147 return 0; |
70 | 148 |
| 149 DCHECK_EQ(channels_, dest->channels()); |
| 150 |
71 // Optimize the |muted_| case to issue a single clear instead of performing | 151 // Optimize the |muted_| case to issue a single clear instead of performing |
72 // the full crossfade and clearing each crossfaded frame. | 152 // the full crossfade and clearing each crossfaded frame. |
73 if (muted_) { | 153 if (muted_) { |
74 int frames_to_render = | 154 int frames_to_render = |
75 std::min(static_cast<int>(audio_buffer_.frames() / playback_rate_), | 155 std::min(static_cast<int>(audio_buffer_.frames() / playback_rate_), |
76 requested_frames); | 156 requested_frames); |
77 | 157 |
78 // Compute accurate number of frames to actually skip in the source data. | 158 // Compute accurate number of frames to actually skip in the source data. |
79 // Includes the leftover partial frame from last request. However, we can | 159 // Includes the leftover partial frame from last request. However, we can |
80 // only skip over complete frames, so a partial frame may remain for next | 160 // only skip over complete frames, so a partial frame may remain for next |
81 // time. | 161 // time. |
82 muted_partial_frame_ += frames_to_render * playback_rate_; | 162 muted_partial_frame_ += frames_to_render * playback_rate_; |
83 int seek_frames = static_cast<int>(muted_partial_frame_); | 163 int seek_frames = static_cast<int>(muted_partial_frame_); |
84 dest->ZeroFrames(frames_to_render); | 164 dest->ZeroFrames(frames_to_render); |
85 audio_buffer_.SeekFrames(seek_frames); | 165 audio_buffer_.SeekFrames(seek_frames); |
86 | 166 |
87 // Determine the partial frame that remains to be skipped for next call. If | 167 // Determine the partial frame that remains to be skipped for next call. If |
88 // the user switches back to playing, it may be off time by this partial | 168 // the user switches back to playing, it may be off time by this partial |
89 // frame, which would be undetectable. If they subsequently switch to | 169 // frame, which would be undetectable. If they subsequently switch to |
90 // another playback rate that mutes, the code will attempt to line up the | 170 // another playback rate that mutes, the code will attempt to line up the |
91 // frames again. | 171 // frames again. |
92 muted_partial_frame_ -= seek_frames; | 172 muted_partial_frame_ -= seek_frames; |
93 return frames_to_render; | 173 return frames_to_render; |
94 } | 174 } |
95 | 175 |
96 int slower_step = ceil(window_size_ * playback_rate_); | 176 int slower_step = ceil(ola_window_size_ * playback_rate_); |
97 int faster_step = ceil(window_size_ / playback_rate_); | 177 int faster_step = ceil(ola_window_size_ / playback_rate_); |
98 | 178 |
99 // Optimize the most common |playback_rate_| ~= 1 case to use a single copy | 179 // Optimize the most common |playback_rate_| ~= 1 case to use a single copy |
100 // instead of copying frame by frame. | 180 // instead of copying frame by frame. |
101 if (window_size_ <= faster_step && slower_step >= window_size_) { | 181 if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) { |
102 const int frames_to_copy = | 182 const int frames_to_copy = |
103 std::min(audio_buffer_.frames(), requested_frames); | 183 std::min(audio_buffer_.frames(), requested_frames); |
104 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest); | 184 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest); |
105 DCHECK_EQ(frames_read, frames_to_copy); | 185 DCHECK_EQ(frames_read, frames_to_copy); |
106 return frames_read; | 186 return frames_read; |
107 } | 187 } |
108 | 188 |
109 int total_frames_rendered = 0; | 189 int rendered_frames = 0; |
110 while (total_frames_rendered < requested_frames) { | 190 do { |
111 if (index_into_window_ >= window_size_) | 191 rendered_frames += WriteCompletedFramesTo( |
112 ResetWindow(); | 192 requested_frames - rendered_frames, rendered_frames, dest); |
113 | 193 } while (rendered_frames < requested_frames && RunOneWsolaIteration()); |
114 int rendered_frames = 0; | 194 return rendered_frames; |
115 if (window_size_ > faster_step) { | |
116 rendered_frames = | |
117 OutputFasterPlayback(dest, | |
118 total_frames_rendered, | |
119 requested_frames - total_frames_rendered, | |
120 window_size_, | |
121 faster_step); | |
122 } else if (slower_step < window_size_) { | |
123 rendered_frames = | |
124 OutputSlowerPlayback(dest, | |
125 total_frames_rendered, | |
126 requested_frames - total_frames_rendered, | |
127 slower_step, | |
128 window_size_); | |
129 } else { | |
130 NOTREACHED(); | |
131 } | |
132 | |
133 if (rendered_frames == 0) | |
134 break; | |
135 | |
136 total_frames_rendered += rendered_frames; | |
137 } | |
138 return total_frames_rendered; | |
139 } | |
140 | |
141 void AudioRendererAlgorithm::ResetWindow() { | |
142 DCHECK_LE(index_into_window_, window_size_); | |
143 index_into_window_ = 0; | |
144 crossfade_frame_number_ = 0; | |
145 } | |
146 | |
147 int AudioRendererAlgorithm::OutputFasterPlayback(AudioBus* dest, | |
148 int dest_offset, | |
149 int requested_frames, | |
150 int input_step, | |
151 int output_step) { | |
152 // Ensure we don't run into OOB read/write situation. | |
153 CHECK_GT(input_step, output_step); | |
154 DCHECK_LT(index_into_window_, window_size_); | |
155 DCHECK_GT(playback_rate_, 1.0); | |
156 DCHECK(!muted_); | |
157 | |
158 if (audio_buffer_.frames() < 1) | |
159 return 0; | |
160 | |
161 // The audio data is output in a series of windows. For sped-up playback, | |
162 // the window is comprised of the following phases: | |
163 // | |
164 // a) Output raw data. | |
165 // b) Save bytes for crossfade in |crossfade_buffer_|. | |
166 // c) Drop data. | |
167 // d) Output crossfaded audio leading up to the next window. | |
168 // | |
169 // The duration of each phase is computed below based on the |window_size_| | |
170 // and |playback_rate_|. | |
171 DCHECK_LE(frames_in_crossfade_, output_step); | |
172 | |
173 // This is the index of the end of phase a, beginning of phase b. | |
174 int outtro_crossfade_begin = output_step - frames_in_crossfade_; | |
175 | |
176 // This is the index of the end of phase b, beginning of phase c. | |
177 int outtro_crossfade_end = output_step; | |
178 | |
179 // This is the index of the end of phase c, beginning of phase d. | |
180 // This phase continues until |index_into_window_| reaches |window_size_|, at | |
181 // which point the window restarts. | |
182 int intro_crossfade_begin = input_step - frames_in_crossfade_; | |
183 | |
184 // a) Output raw frames if we haven't reached the crossfade section. | |
185 if (index_into_window_ < outtro_crossfade_begin) { | |
186 // Read as many frames as we can and return the count. If it's not enough, | |
187 // we will get called again. | |
188 const int frames_to_copy = | |
189 std::min(requested_frames, outtro_crossfade_begin - index_into_window_); | |
190 int copied = audio_buffer_.ReadFrames(frames_to_copy, dest_offset, dest); | |
191 index_into_window_ += copied; | |
192 return copied; | |
193 } | |
194 | |
195 // b) Save outtro crossfade frames into intermediate buffer, but do not output | |
196 // anything to |dest|. | |
197 if (index_into_window_ < outtro_crossfade_end) { | |
198 // This phase only applies if there are bytes to crossfade. | |
199 DCHECK_GT(frames_in_crossfade_, 0); | |
200 int crossfade_start = index_into_window_ - outtro_crossfade_begin; | |
201 int crossfade_count = outtro_crossfade_end - index_into_window_; | |
202 int copied = audio_buffer_.ReadFrames( | |
203 crossfade_count, crossfade_start, crossfade_buffer_.get()); | |
204 index_into_window_ += copied; | |
205 | |
206 // Did we get all the frames we need? If not, return and let subsequent | |
207 // calls try to get the rest. | |
208 if (copied != crossfade_count) | |
209 return 0; | |
210 } | |
211 | |
212 // c) Drop frames until we reach the intro crossfade section. | |
213 if (index_into_window_ < intro_crossfade_begin) { | |
214 // Check if there is enough data to skip all the frames needed. If not, | |
215 // return 0 and let subsequent calls try to skip it all. | |
216 int seek_frames = intro_crossfade_begin - index_into_window_; | |
217 if (audio_buffer_.frames() < seek_frames) | |
218 return 0; | |
219 audio_buffer_.SeekFrames(seek_frames); | |
220 | |
221 // We've dropped all the frames that need to be dropped. | |
222 index_into_window_ += seek_frames; | |
223 } | |
224 | |
225 // d) Crossfade and output a frame, as long as we have data. | |
226 if (audio_buffer_.frames() < 1) | |
227 return 0; | |
228 DCHECK_GT(frames_in_crossfade_, 0); | |
229 DCHECK_LT(index_into_window_, window_size_); | |
230 | |
231 int offset_into_buffer = index_into_window_ - intro_crossfade_begin; | |
232 int copied = audio_buffer_.ReadFrames(1, dest_offset, dest); | |
233 DCHECK_EQ(copied, 1); | |
234 CrossfadeFrame(crossfade_buffer_.get(), | |
235 offset_into_buffer, | |
236 dest, | |
237 dest_offset, | |
238 offset_into_buffer); | |
239 index_into_window_ += copied; | |
240 return copied; | |
241 } | |
242 | |
243 int AudioRendererAlgorithm::OutputSlowerPlayback(AudioBus* dest, | |
244 int dest_offset, | |
245 int requested_frames, | |
246 int input_step, | |
247 int output_step) { | |
248 // Ensure we don't run into OOB read/write situation. | |
249 CHECK_LT(input_step, output_step); | |
250 DCHECK_LT(index_into_window_, window_size_); | |
251 DCHECK_LT(playback_rate_, 1.0); | |
252 DCHECK_NE(playback_rate_, 0); | |
253 DCHECK(!muted_); | |
254 | |
255 if (audio_buffer_.frames() < 1) | |
256 return 0; | |
257 | |
258 // The audio data is output in a series of windows. For slowed down playback, | |
259 // the window is comprised of the following phases: | |
260 // | |
261 // a) Output raw data. | |
262 // b) Output and save bytes for crossfade in |crossfade_buffer_|. | |
263 // c) Output* raw data. | |
264 // d) Output* crossfaded audio leading up to the next window. | |
265 // | |
266 // * Phases c) and d) do not progress |audio_buffer_|'s cursor so that the | |
267 // |audio_buffer_|'s cursor is in the correct place for the next window. | |
268 // | |
269 // The duration of each phase is computed below based on the |window_size_| | |
270 // and |playback_rate_|. | |
271 DCHECK_LE(frames_in_crossfade_, input_step); | |
272 | |
273 // This is the index of the end of phase a, beginning of phase b. | |
274 int intro_crossfade_begin = input_step - frames_in_crossfade_; | |
275 | |
276 // This is the index of the end of phase b, beginning of phase c. | |
277 int intro_crossfade_end = input_step; | |
278 | |
279 // This is the index of the end of phase c, beginning of phase d. | |
280 // This phase continues until |index_into_window_| reaches |window_size_|, at | |
281 // which point the window restarts. | |
282 int outtro_crossfade_begin = output_step - frames_in_crossfade_; | |
283 | |
284 // a) Output raw frames. | |
285 if (index_into_window_ < intro_crossfade_begin) { | |
286 // Read as many frames as we can and return the count. If it's not enough, | |
287 // we will get called again. | |
288 const int frames_to_copy = | |
289 std::min(requested_frames, intro_crossfade_begin - index_into_window_); | |
290 int copied = audio_buffer_.ReadFrames(frames_to_copy, dest_offset, dest); | |
291 index_into_window_ += copied; | |
292 return copied; | |
293 } | |
294 | |
295 // b) Save the raw frames for the intro crossfade section, then copy the | |
296 // same frames to |dest|. | |
297 if (index_into_window_ < intro_crossfade_end) { | |
298 const int frames_to_copy = | |
299 std::min(requested_frames, intro_crossfade_end - index_into_window_); | |
300 int offset = index_into_window_ - intro_crossfade_begin; | |
301 int copied = audio_buffer_.ReadFrames( | |
302 frames_to_copy, offset, crossfade_buffer_.get()); | |
303 crossfade_buffer_->CopyPartialFramesTo(offset, copied, dest_offset, dest); | |
304 index_into_window_ += copied; | |
305 return copied; | |
306 } | |
307 | |
308 // c) Output a raw frame into |dest| without advancing the |audio_buffer_| | |
309 // cursor. | |
310 int audio_buffer_offset = index_into_window_ - intro_crossfade_end; | |
311 DCHECK_GE(audio_buffer_offset, 0); | |
312 if (audio_buffer_.frames() <= audio_buffer_offset) | |
313 return 0; | |
314 int copied = | |
315 audio_buffer_.PeekFrames(1, audio_buffer_offset, dest_offset, dest); | |
316 DCHECK_EQ(1, copied); | |
317 | |
318 // d) Crossfade the next frame of |crossfade_buffer_| into |dest| if we've | |
319 // reached the outtro crossfade section of the window. | |
320 if (index_into_window_ >= outtro_crossfade_begin) { | |
321 int offset_into_crossfade_buffer = | |
322 index_into_window_ - outtro_crossfade_begin; | |
323 CrossfadeFrame(dest, | |
324 dest_offset, | |
325 crossfade_buffer_.get(), | |
326 offset_into_crossfade_buffer, | |
327 offset_into_crossfade_buffer); | |
328 } | |
329 | |
330 index_into_window_ += copied; | |
331 return copied; | |
332 } | |
333 | |
334 void AudioRendererAlgorithm::CrossfadeFrame(AudioBus* intro, | |
335 int intro_offset, | |
336 AudioBus* outtro, | |
337 int outtro_offset, | |
338 int fade_offset) { | |
339 float crossfade_ratio = | |
340 static_cast<float>(fade_offset) / frames_in_crossfade_; | |
341 for (int channel = 0; channel < channels_; ++channel) { | |
342 outtro->channel(channel)[outtro_offset] = | |
343 (1.0f - crossfade_ratio) * intro->channel(channel)[intro_offset] + | |
344 (crossfade_ratio) * outtro->channel(channel)[outtro_offset]; | |
345 } | |
346 } | 195 } |
347 | 196 |
348 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) { | 197 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) { |
349 DCHECK_GE(new_rate, 0); | 198 DCHECK_GE(new_rate, 0); |
350 playback_rate_ = new_rate; | 199 playback_rate_ = new_rate; |
351 muted_ = | 200 muted_ = |
352 playback_rate_ < kMinPlaybackRate || playback_rate_ > kMaxPlaybackRate; | 201 playback_rate_ < kMinPlaybackRate || playback_rate_ > kMaxPlaybackRate; |
353 | |
354 ResetWindow(); | |
355 } | 202 } |
356 | 203 |
357 void AudioRendererAlgorithm::FlushBuffers() { | 204 void AudioRendererAlgorithm::FlushBuffers() { |
358 ResetWindow(); | |
359 | |
360 // Clear the queue of decoded packets (releasing the buffers). | 205 // Clear the queue of decoded packets (releasing the buffers). |
361 audio_buffer_.Clear(); | 206 audio_buffer_.Clear(); |
| 207 output_time_ = 0.0; |
| 208 search_block_index_ = 0; |
| 209 target_block_index_ = 0; |
| 210 wsola_output_->Zero(); |
| 211 num_complete_frames_ = 0; |
362 } | 212 } |
363 | 213 |
364 base::TimeDelta AudioRendererAlgorithm::GetTime() { | 214 base::TimeDelta AudioRendererAlgorithm::GetTime() { |
365 return audio_buffer_.current_time(); | 215 return audio_buffer_.current_time(); |
366 } | 216 } |
367 | 217 |
368 void AudioRendererAlgorithm::EnqueueBuffer( | 218 void AudioRendererAlgorithm::EnqueueBuffer( |
369 const scoped_refptr<AudioBuffer>& buffer_in) { | 219 const scoped_refptr<AudioBuffer>& buffer_in) { |
370 DCHECK(!buffer_in->end_of_stream()); | 220 DCHECK(!buffer_in->end_of_stream()); |
371 audio_buffer_.Append(buffer_in); | 221 audio_buffer_.Append(buffer_in); |
372 } | 222 } |
373 | 223 |
374 bool AudioRendererAlgorithm::IsQueueFull() { | 224 bool AudioRendererAlgorithm::IsQueueFull() { |
375 return audio_buffer_.frames() >= capacity_; | 225 return audio_buffer_.frames() >= capacity_; |
376 } | 226 } |
377 | 227 |
378 void AudioRendererAlgorithm::IncreaseQueueCapacity() { | 228 void AudioRendererAlgorithm::IncreaseQueueCapacity() { |
379 capacity_ = std::min(2 * capacity_, kMaxBufferSizeInFrames); | 229 capacity_ = std::min(2 * capacity_, kMaxBufferSizeInFrames); |
380 } | 230 } |
381 | 231 |
| 232 bool AudioRendererAlgorithm::CanPerformWsola() const { |
| 233 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1); |
| 234 const int frames = audio_buffer_.frames(); |
| 235 return target_block_index_ + ola_window_size_ <= frames && |
| 236 search_block_index_ + search_block_size <= frames; |
| 237 } |
| 238 |
| 239 bool AudioRendererAlgorithm::RunOneWsolaIteration() { |
| 240 if (!CanPerformWsola()) |
| 241 return false; |
| 242 |
| 243 GetOptimalBlock(); |
| 244 |
| 245 // Overlap-and-add. |
| 246 for (int k = 0; k < channels_; ++k) { |
| 247 const float* const ch_opt_frame = optimal_block_->channel(k); |
| 248 float* ch_output = wsola_output_->channel(k) + num_complete_frames_; |
| 249 for (int n = 0; n < ola_hop_size_; ++n) { |
| 250 ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] + |
| 251 ch_opt_frame[n] * ola_window_[n]; |
| 252 } |
| 253 |
| 254 // Copy the second half to the output. |
| 255 memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_], |
| 256 sizeof(*ch_opt_frame) * ola_hop_size_); |
| 257 } |
| 258 |
| 259 num_complete_frames_ += ola_hop_size_; |
| 260 UpdateOutputTime(ola_hop_size_); |
| 261 RemoveOldInputFrames(); |
| 262 return true; |
| 263 } |
| 264 |
| 265 void AudioRendererAlgorithm::UpdateOutputTime(double time_change) { |
| 266 output_time_ += time_change; |
| 267 // Center of the search region, in frames. |
| 268 const int search_block_center_index = static_cast<int>( |
| 269 output_time_ * playback_rate_ + 0.5); |
| 270 search_block_index_ = search_block_center_index - search_block_center_offset_; |
| 271 } |
| 272 |
| 273 void AudioRendererAlgorithm::RemoveOldInputFrames() { |
| 274 const int earliest_used_index = std::min(target_block_index_, |
| 275 search_block_index_); |
| 276 if (earliest_used_index <= 0) |
| 277 return; // Nothing to remove. |
| 278 |
| 279 // Remove frames from input and adjust indices accordingly. |
| 280 audio_buffer_.SeekFrames(earliest_used_index); |
| 281 target_block_index_ -= earliest_used_index; |
| 282 |
| 283 // Adjust output index. |
| 284 double output_time_change = static_cast<double>(earliest_used_index) / |
| 285 playback_rate_; |
| 286 CHECK_GE(output_time_, output_time_change); |
| 287 UpdateOutputTime(-output_time_change); |
| 288 } |
| 289 |
| 290 int AudioRendererAlgorithm::WriteCompletedFramesTo( |
| 291 int requested_frames, int dest_offset, AudioBus* dest) { |
| 292 int rendered_frames = std::min(num_complete_frames_, requested_frames); |
| 293 |
| 294 if (rendered_frames == 0) |
| 295 return 0; // There is nothing to read from |wsola_output_|, return. |
| 296 |
| 297 wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest); |
| 298 |
| 299 // Remove the frames which are read. |
| 300 int frames_to_move = wsola_output_->frames() - rendered_frames; |
| 301 for (int k = 0; k < channels_; ++k) { |
| 302 float* ch = wsola_output_->channel(k); |
| 303 memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move); |
| 304 } |
| 305 num_complete_frames_ -= rendered_frames; |
| 306 return rendered_frames; |
| 307 } |
| 308 |
| 309 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const { |
| 310 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1); |
| 311 |
| 312 return target_block_index_ >= search_block_index_ && |
| 313 target_block_index_ + ola_window_size_ <= |
| 314 search_block_index_ + search_block_size; |
| 315 } |
| 316 |
| 317 void AudioRendererAlgorithm::GetOptimalBlock() { |
| 318 int optimal_index = 0; |
| 319 |
| 320 // An interval around last optimal block which is excluded from the search. |
| 321 // This is to reduce the buzzy sound. The number 160 is rather arbitrary and |
| 322 // derived heuristically. |
| 323 const int kExcludeIntervalLengthFrames = 160; |
| 324 if (TargetIsWithinSearchRegion()) { |
| 325 optimal_index = target_block_index_; |
| 326 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get()); |
| 327 } else { |
| 328 PeekAudioWithZeroPrepend(target_block_index_, target_block_.get()); |
| 329 PeekAudioWithZeroPrepend(search_block_index_, search_block_.get()); |
| 330 int last_optimal = target_block_index_ - ola_hop_size_ - |
| 331 search_block_index_; |
| 332 internal::Interval exclude_iterval = std::make_pair( |
| 333 last_optimal - kExcludeIntervalLengthFrames / 2, |
| 334 last_optimal + kExcludeIntervalLengthFrames / 2); |
| 335 |
| 336 // |optimal_index| is in frames and it is relative to the beginning of the |
| 337 // |search_block_|. |
| 338 optimal_index = internal::OptimalIndex( |
| 339 search_block_.get(), target_block_.get(), exclude_iterval); |
| 340 |
| 341 // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the |
| 342 // optimal block. |
| 343 optimal_index += search_block_index_; |
| 344 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get()); |
| 345 |
| 346 // Make a transition from target block to the optimal block if different. |
| 347 // Target block has the best continuation to the current output. |
| 348 // Optimal block is the most similar block to the target, however, it might |
| 349 // introduce some discontinuity when over-lap-added. Therefore, we combine |
| 350 // them for a smoother transition. The length of transition window is twice |
| 351 // as that of the optimal-block which makes it like a weighting function |
| 352 // where target-block has higher weight close to zero (weight of 1 at index |
| 353 // 0) and lower weight close the end. |
| 354 for (int k = 0; k < channels_; ++k) { |
| 355 float* ch_opt = optimal_block_->channel(k); |
| 356 const float* const ch_target = target_block_->channel(k); |
| 357 for (int n = 0; n < ola_window_size_; ++n) { |
| 358 ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] * |
| 359 transition_window_[ola_window_size_ + n]; |
| 360 } |
| 361 } |
| 362 } |
| 363 |
| 364 // Next target is one hop ahead of the current optimal. |
| 365 target_block_index_ = optimal_index + ola_hop_size_; |
| 366 } |
| 367 |
| 368 void AudioRendererAlgorithm::PeekAudioWithZeroPrepend( |
| 369 int read_offset_frames, AudioBus* dest) { |
| 370 CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames()); |
| 371 |
| 372 int write_offset = 0; |
| 373 int num_frames_to_read = dest->frames(); |
| 374 if (read_offset_frames < 0) { |
| 375 int num_zero_frames_appended = std::min(-read_offset_frames, |
| 376 num_frames_to_read); |
| 377 read_offset_frames = 0; |
| 378 num_frames_to_read -= num_zero_frames_appended; |
| 379 write_offset = num_zero_frames_appended; |
| 380 dest->ZeroFrames(num_zero_frames_appended); |
| 381 } |
| 382 audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames, |
| 383 write_offset, dest); |
| 384 } |
| 385 |
382 } // namespace media | 386 } // namespace media |
OLD | NEW |