OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of | 5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of |
6 // this object provides audio data to the object through EnqueueBuffer() and | 6 // this object provides audio data to the object through EnqueueBuffer() and |
7 // requests data from the buffer via FillBuffer(). The owner also sets the | 7 // requests data from the buffer via FillBuffer(). The owner also sets the |
8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the | 8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the |
9 // buffered audio as necessary to match the playback rate when fulfilling | 9 // buffered audio as necessary to match the playback rate when fulfilling |
10 // FillBuffer() requests. | 10 // FillBuffer() requests. |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
77 // than |audio_buffer_| was intending to hold. | 77 // than |audio_buffer_| was intending to hold. |
78 int frames_buffered() { return audio_buffer_.frames(); } | 78 int frames_buffered() { return audio_buffer_.frames(); } |
79 | 79 |
80 // Returns the samples per second for this audio stream. | 80 // Returns the samples per second for this audio stream. |
81 int samples_per_second() { return samples_per_second_; } | 81 int samples_per_second() { return samples_per_second_; } |
82 | 82 |
83 // Is the sound currently muted? | 83 // Is the sound currently muted? |
84 bool is_muted() { return muted_; } | 84 bool is_muted() { return muted_; } |
85 | 85 |
86 private: | 86 private: |
87 // Fills |dest| with up to |requested_frames| frames of audio data at faster | 87 // Run WSOLA on the input to obtain |requested_frames| output frames and |
88 // than normal speed. Returns the number of frames inserted into |dest|. If | 88 // write them to |dest|). |
89 // not enough data available, returns 0. | 89 int WsolaOutput(int requested_frames, AudioBus* dest); |
DaleCurtis
2013/08/06 18:04:55
It's confusing to have Wsola() and WsolaOutput() c
turaj
2013/08/06 23:29:27
I guess it is better with new names.
On 2013/08/
| |
90 // | |
91 // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish | |
92 // audio output while preserving pitch. Essentially, we play a bit of audio | |
93 // data at normal speed, then we "fast forward" by dropping the next bit of | |
94 // audio data, and then we stich the pieces together by crossfading from one | |
95 // audio chunk to the next. | |
96 int OutputFasterPlayback(AudioBus* dest, | |
97 int dest_offset, | |
98 int requested_frames, | |
99 int input_step, | |
100 int output_step); | |
101 | 90 |
102 // Fills |dest| with up to |requested_frames| frames of audio data at slower | 91 // Within the search region, find the block of data that is most similar to |
103 // than normal speed. Returns the number of frames inserted into |dest|. If | 92 // target block, and write it in |optimal_block_|. Returns false it there is |
104 // not enough data available, returns 0. | 93 // not enough data to perform search. This is the case if either |
105 // | 94 // |target_block_| or |search_block_| extend to future (need future samples |
DaleCurtis
2013/08/06 18:04:55
s/to/into the/
s/(need future samples to be comple
turaj
2013/08/06 23:29:27
Done.
| |
106 // When the audio playback is < 1.0, we use a variant of Overlap-Add to | 95 // to be completed). Otherwise true is returned. |
107 // stretch audio output while preserving pitch. This works by outputting a | 96 bool GetOptimalBlock(); |
108 // segment of audio data at normal speed. The next audio segment then starts | |
109 // by repeating some of the audio data from the previous audio segment. | |
110 // Segments are stiched together by crossfading from one audio chunk to the | |
111 // next. | |
112 int OutputSlowerPlayback(AudioBus* dest, | |
113 int dest_offset, | |
114 int requested_frames, | |
115 int input_step, | |
116 int output_step); | |
117 | 97 |
118 // Resets the window state to the start of a new window. | 98 // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns |
119 void ResetWindow(); | 99 // number of frames actually read. |
100 int ReadWsolaOutput(int requested_frames, int output_offset, AudioBus* dest); | |
120 | 101 |
121 // Does a linear crossfade from |intro| into |outtro| for one frame. | 102 // Fill |dest| with frames from |audio_buffer_| starting from frame |
122 void CrossfadeFrame(AudioBus* intro, | 103 // |read_offset_frames|. |dest| is expected to have the same number of |
123 int intro_offset, | 104 // channels as |audio_buffer_|. Negative offsets, i.e. |
DaleCurtis
2013/08/06 18:04:55
In what cases will a negative offset be specified?
turaj
2013/08/06 23:29:27
I added some comment.
On 2013/08/06 18:04:55, Dal
| |
124 AudioBus* outtro, | 105 // |read_offset_frames| < 0, are accepted assuming that |audio_buffer| is zero |
125 int outtro_offset, | 106 // for negative indices. False will be returned if it is required to read |
126 int fade_offset); | 107 // beyond the last frame of |audio_buffer_|, otherwise true is returned. |
108 bool PeekAudioWithZeroAppend(int read_offset_frames, AudioBus* dest); | |
109 | |
110 // Run one iteration of WSOLA, if there are sufficient frames. This will | |
111 // extend the output by |ola_hop_size|, written to |wsola_output_|. Then, | |
DaleCurtis
2013/08/06 18:04:55
ola_hop_size_
turaj
2013/08/06 23:29:27
Done.
| |
112 // at most |requested_frames| frames are read and written to |dest|, starting | |
113 // at |dest_offset| frame. The number of frames | |
114 // which is actually written to |dest| is returned. | |
115 int Wsola(int requested_frames, int output_offset, AudioBus* dest); | |
116 | |
117 // Seek |audio_buffer_| forward to remove frames from input that is not used | |
DaleCurtis
2013/08/06 18:04:55
s/is not/are not/
turaj
2013/08/06 23:29:27
Done.
| |
118 // any more. State of the WSOLA should be updated accordingly. | |
DaleCurtis
2013/08/06 18:04:55
s/should/will be/ ?
turaj
2013/08/06 23:29:27
Done.
| |
119 void RemoveOldInputFrames(); | |
120 | |
121 // Return the index to the first frame of the search region. | |
122 int GetSearchRegionIndex() const; | |
123 | |
124 // Is the target block fully within search region? If so, we don't need to | |
125 // perform the search. | |
126 bool TargetIsWithinSearchRegion() const; | |
127 | |
128 // Do we have enough data to perform one round of WSOLA? | |
129 bool CanPerformWsola() const; | |
127 | 130 |
128 // Number of channels in audio stream. | 131 // Number of channels in audio stream. |
129 int channels_; | 132 int channels_; |
130 | 133 |
131 // Sample rate of audio stream. | 134 // Sample rate of audio stream. |
132 int samples_per_second_; | 135 int samples_per_second_; |
133 | 136 |
134 // Used by algorithm to scale output. | 137 // Used by algorithm to scale output. |
135 float playback_rate_; | 138 float playback_rate_; |
136 | 139 |
137 // Buffered audio data. | 140 // Buffered audio data. |
138 AudioBufferQueue audio_buffer_; | 141 AudioBufferQueue audio_buffer_; |
139 | 142 |
140 // Length for crossfade in frames. | |
141 int frames_in_crossfade_; | |
142 | |
143 // The current location in the audio window, between 0 and |window_size_|. | |
144 // When |index_into_window_| reaches |window_size_|, the window resets. | |
145 // Indexed by frame. | |
146 int index_into_window_; | |
147 | |
148 // The frame number in the crossfade. | |
149 int crossfade_frame_number_; | |
150 | |
151 // True if the audio should be muted. | 143 // True if the audio should be muted. |
152 bool muted_; | 144 bool muted_; |
153 | 145 |
154 // If muted, keep track of partial frames that should have been skipped over. | 146 // If muted, keep track of partial frames that should have been skipped over. |
155 double muted_partial_frame_; | 147 double muted_partial_frame_; |
156 | 148 |
157 // Temporary buffer to hold crossfade data. | |
158 scoped_ptr<AudioBus> crossfade_buffer_; | |
159 | |
160 // Window size, in frames (calculated from audio properties). | |
161 int window_size_; | |
162 | |
163 // How many frames to have in the queue before we report the queue is full. | 149 // How many frames to have in the queue before we report the queue is full. |
164 int capacity_; | 150 int capacity_; |
165 | 151 |
152 // Waveform Similarity Overlap-and-add (WSOLA) variables. | |
153 // | |
154 // This is how WSOLA with 50% overlap-add works: | |
155 // | |
156 // Notation: | |
157 // | |
158 // |W| overlap-and-add (OLA) window. | |
159 // |L| size of |W| in samples. | |
160 // |alpha| playback-rate, where values less than 1 indicate a slowed-down | |
161 // playout (output is longer than input). | |
162 // |ts_out| current timestamp of output. | |
163 // |target| target-block, we search the input to find a block that is most | |
164 // similar to |target|. Similarity is measured by the correlation | |
165 // between two given blocks. | |
166 // |tau| a parameter defining the search interval. The search interval for | |
167 // the best matched to |target| is | |
168 // [|ts_out|*|alpha|-|tau|, |ts_out|*|alpha|+|tau|]. | |
169 // |U| Transition Window. See the step 5) for the usage of this window. | |
170 // | |
171 // Assume we start at time 0, i.e. beginning of both input | |
172 // and output streams. | |
173 // | |
174 // 1) Initialize the output with the faded-out version of the first |L/2| | |
175 // samples of the input. The faded-out version is constructed by | |
176 // multiplying |L/2| input samples with the second half of OLA window, |W|. | |
177 // | |
178 // 2) Set the timestamp of output, |ts_out|, to |L/2|. | |
179 // | |
180 // 3) |target| is samples [0, L) of the input. This is the "natural" | |
181 // continuation to the output (given 50% overlap-and-add). | |
182 // | |
183 // 4) Search interval of input is then centered at |ts_out| * |alpha| with | |
184 // the width of 2 * |tau|, i.e. |ts_out| * |alpha| + [-|tau|, |tau|]. | |
185 // | |
186 // 5) Find a frame which is centered within the search interval and is most | |
187 // similar to |target|. Let |Q| be the most similar block to |target| | |
188 // centered at |ts_in_opt|. | |
189 // We compute the optimal block as |opt| = |U| * |target| + | |
190 // (1 - |U|) * |Q|. | |
191 // | |
192 // 6) Overlap-and-add |opt| to the output. That is to add |opt| * |W| to the | |
193 // output with |L/2| samples overlap. | |
194 // | |
195 // 7) |ts_out| = |ts_out| + |L/2| | |
196 // Let |target| be the frame of the input centered at |ts_in_opt| + |L/2|. | |
197 // Note that now |target| is the natural continuation to the current | |
198 // output (the frame that follows |opt| in overlap-and-add sense). | |
199 // Continue from step 4. | |
200 // | |
201 | |
202 // Book keeping of the current index of generated audio, in frames. This | |
203 // should be appropriately updated when out samples are generated, regardless | |
204 // of whether we push samples out when FillBuffer() is called or we store | |
205 // audio in |wsola_output| for the subsequent calls to FillBuffer(). | |
206 // Furthermore, if samples from input |audio_buffer_| are evicted then this | |
207 // variable should be updated accordingly, based on |playback_rate_|. | |
208 int output_index_; | |
209 | |
210 // The offset of the search center frame w.r.t. the first frame. | |
211 int search_block_center_offset_; | |
212 | |
213 // Number of frames to search to find the most similar one to the target | |
214 // frame. | |
215 int num_candidate_frames_; | |
216 | |
217 // Index of the beginning of the target block, counted in frames. | |
218 int target_block_index_; | |
219 | |
220 // Overlap-and-add window size in frames, denoted as |L| in WSOAL description. | |
221 int ola_window_size_; | |
222 | |
223 // The hop size of overlap-and-add in frames (|L/2|). This implementation | |
224 // assumes 50% overlap-and-add. | |
225 int ola_hop_size_; | |
226 | |
227 // Number of frames in |wsola_output_| that overlap-and-add is completed for | |
228 // them and can be copied to output if FillBuffer() is called. It also | |
229 // specifies the index where the next WSOLA window has to overlap-and-add. | |
230 int num_complete_frames_; | |
231 | |
232 // This stores a part of the output that is created but couldn't be rendered. | |
233 // Output is generated frame-by-frame which at some point might exceed the | |
234 // number of requested samples. Furthermore, due to overlap-and-add, | |
235 // the last half-window of the output is incomplete, which is stored in this | |
236 // buffer. | |
237 scoped_ptr<AudioBus> wsola_output_; | |
238 | |
239 // Overlap-and-add window, denoted as |W| in the above (see step 6). | |
240 scoped_ptr<float[]> ola_window_; | |
241 | |
242 // Transition window, denoted as |U| in the above (see step 5). | |
243 scoped_ptr<float[]> transition_window_; | |
244 | |
245 // Auxiliary variables to avoid allocation in every iteration. | |
246 | |
247 // Stores the optimal block in every iteration. This is the most | |
248 // similar block to |target_block_| within |search_block_| and it is | |
249 // overlap-and-added to |wsola_output_|. | |
250 scoped_ptr<AudioBus> optimal_block_; | |
251 | |
252 // A block of data that search is performed over to find the |optimal_block_|. | |
253 scoped_ptr<AudioBus> search_block_; | |
254 | |
255 // Stores the target block, denoted as |target| above. |search_block_| is | |
256 // searched for a block (|optimal_block_|) that is most similar to | |
257 // |target_block_|. | |
258 scoped_ptr<AudioBus> target_block_; | |
259 | |
166 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); | 260 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); |
167 }; | 261 }; |
168 | 262 |
169 } // namespace media | 263 } // namespace media |
170 | 264 |
171 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ | 265 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ |
OLD | NEW |