OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of | 5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of |
6 // this object provides audio data to the object through EnqueueBuffer() and | 6 // this object provides audio data to the object through EnqueueBuffer() and |
7 // requests data from the buffer via FillBuffer(). The owner also sets the | 7 // requests data from the buffer via FillBuffer(). The owner also sets the |
8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the | 8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the |
9 // buffered audio as necessary to match the playback rate when fulfilling | 9 // buffered audio as necessary to match the playback rate when fulfilling |
10 // FillBuffer() requests. | 10 // FillBuffer() requests. |
11 // | 11 // |
12 // This class is *not* thread-safe. Calls to enqueue and retrieve data must be | 12 // This class is *not* thread-safe. Calls to enqueue and retrieve data must be |
13 // locked if called from multiple threads. | 13 // locked if called from multiple threads. |
14 // | 14 // |
15 // AudioRendererAlgorithm uses a simple pitch-preservation algorithm to | 15 // AudioRendererAlgorithm uses the Waveform Similarity Overlap and Add (WSOLA) |
16 // stretch and compress audio data to meet playback speeds less than and | 16 // algorithm to stretch or compress audio data to meet playback speeds less than |
17 // greater than the natural playback of the audio stream. | 17 // or greater than the natural playback of the audio stream. The algorithm |
| 18 // preserves local properties of the audio, therefore, pitch and harmonics are |
| 19 // are preserved. See audio_renderer_algorith.cc for a more elaborate |
| 20 // description of the algorithm. |
18 // | 21 // |
19 // Audio at very low or very high playback rates are muted to preserve quality. | 22 // Audio at very low or very high playback rates are muted to preserve quality. |
| 23 // |
20 | 24 |
21 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ | 25 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ |
22 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ | 26 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ |
23 | 27 |
24 #include "base/memory/ref_counted.h" | 28 #include "base/memory/ref_counted.h" |
25 #include "base/memory/scoped_ptr.h" | 29 #include "base/memory/scoped_ptr.h" |
26 #include "media/audio/audio_parameters.h" | 30 #include "media/audio/audio_parameters.h" |
27 #include "media/base/audio_buffer.h" | 31 #include "media/base/audio_buffer.h" |
28 #include "media/base/audio_buffer_queue.h" | 32 #include "media/base/audio_buffer_queue.h" |
29 | 33 |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 // than |audio_buffer_| was intending to hold. | 81 // than |audio_buffer_| was intending to hold. |
78 int frames_buffered() { return audio_buffer_.frames(); } | 82 int frames_buffered() { return audio_buffer_.frames(); } |
79 | 83 |
80 // Returns the samples per second for this audio stream. | 84 // Returns the samples per second for this audio stream. |
81 int samples_per_second() { return samples_per_second_; } | 85 int samples_per_second() { return samples_per_second_; } |
82 | 86 |
83 // Is the sound currently muted? | 87 // Is the sound currently muted? |
84 bool is_muted() { return muted_; } | 88 bool is_muted() { return muted_; } |
85 | 89 |
86 private: | 90 private: |
87 // Fills |dest| with up to |requested_frames| frames of audio data at faster | 91 // Within |search_block_|, find the block of data that is most similar to |
88 // than normal speed. Returns the number of frames inserted into |dest|. If | 92 // |target_block_|, and write it in |optimal_block_|. This method assumes that |
89 // not enough data available, returns 0. | 93 // there is enough data to perform a search, i.e. |search_block_| and |
90 // | 94 // |target_block_| can be extracted from the available frames. |
91 // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish | 95 void GetOptimalBlock(); |
92 // audio output while preserving pitch. Essentially, we play a bit of audio | |
93 // data at normal speed, then we "fast forward" by dropping the next bit of | |
94 // audio data, and then we stich the pieces together by crossfading from one | |
95 // audio chunk to the next. | |
96 int OutputFasterPlayback(AudioBus* dest, | |
97 int dest_offset, | |
98 int requested_frames, | |
99 int input_step, | |
100 int output_step); | |
101 | 96 |
102 // Fills |dest| with up to |requested_frames| frames of audio data at slower | 97 // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns |
103 // than normal speed. Returns the number of frames inserted into |dest|. If | 98 // number of frames actually read. |
104 // not enough data available, returns 0. | 99 int WriteCompletedFramesTo( |
105 // | 100 int requested_frames, int output_offset, AudioBus* dest); |
106 // When the audio playback is < 1.0, we use a variant of Overlap-Add to | |
107 // stretch audio output while preserving pitch. This works by outputting a | |
108 // segment of audio data at normal speed. The next audio segment then starts | |
109 // by repeating some of the audio data from the previous audio segment. | |
110 // Segments are stiched together by crossfading from one audio chunk to the | |
111 // next. | |
112 int OutputSlowerPlayback(AudioBus* dest, | |
113 int dest_offset, | |
114 int requested_frames, | |
115 int input_step, | |
116 int output_step); | |
117 | 101 |
118 // Resets the window state to the start of a new window. | 102 // Fill |dest| with frames from |audio_buffer_| starting from frame |
119 void ResetWindow(); | 103 // |read_offset_frames|. |dest| is expected to have the same number of |
| 104 // channels as |audio_buffer_|. A negative offset, i.e. |
| 105 // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero |
| 106 // for negative indices. This might happen for few first frames. This method |
| 107 // assumes there is enough frames to fill |dest|, i.e. |read_offset_frames| + |
| 108 // |dest->frames()| does not extend to future. |
| 109 void PeekAudioWithZeroPrepend(int read_offset_frames, AudioBus* dest); |
120 | 110 |
121 // Does a linear crossfade from |intro| into |outtro| for one frame. | 111 // Run one iteration of WSOLA, if there are sufficient frames. This will |
122 void CrossfadeFrame(AudioBus* intro, | 112 // overlap-and-add one block to |wsola_output_|, hence, |num_complete_frames_| |
123 int intro_offset, | 113 // is incremented by |ola_hop_size_|. |
124 AudioBus* outtro, | 114 bool RunOneWsolaIteration(); |
125 int outtro_offset, | 115 |
126 int fade_offset); | 116 // Seek |audio_buffer_| forward to remove frames from input that are not used |
| 117 // any more. State of the WSOLA will be updated accordingly. |
| 118 void RemoveOldInputFrames(); |
| 119 |
| 120 // Update |output_time_| by |time_change|. In turn |search_block_index_| is |
| 121 // updated. |
| 122 void UpdateOutputTime(float time_change); |
| 123 |
| 124 // Is |target_block_| fully within |search_block_|? If so, we don't need to |
| 125 // perform the search. |
| 126 bool TargetIsWithinSearchRegion() const; |
| 127 |
| 128 // Do we have enough data to perform one round of WSOLA? |
| 129 bool CanPerformWsola() const; |
127 | 130 |
128 // Number of channels in audio stream. | 131 // Number of channels in audio stream. |
129 int channels_; | 132 int channels_; |
130 | 133 |
131 // Sample rate of audio stream. | 134 // Sample rate of audio stream. |
132 int samples_per_second_; | 135 int samples_per_second_; |
133 | 136 |
134 // Used by algorithm to scale output. | 137 // Used by algorithm to scale output. |
135 float playback_rate_; | 138 float playback_rate_; |
136 | 139 |
137 // Buffered audio data. | 140 // Buffered audio data. |
138 AudioBufferQueue audio_buffer_; | 141 AudioBufferQueue audio_buffer_; |
139 | 142 |
140 // Length for crossfade in frames. | |
141 int frames_in_crossfade_; | |
142 | |
143 // The current location in the audio window, between 0 and |window_size_|. | |
144 // When |index_into_window_| reaches |window_size_|, the window resets. | |
145 // Indexed by frame. | |
146 int index_into_window_; | |
147 | |
148 // The frame number in the crossfade. | |
149 int crossfade_frame_number_; | |
150 | |
151 // True if the audio should be muted. | 143 // True if the audio should be muted. |
152 bool muted_; | 144 bool muted_; |
153 | 145 |
154 // If muted, keep track of partial frames that should have been skipped over. | 146 // If muted, keep track of partial frames that should have been skipped over. |
155 double muted_partial_frame_; | 147 double muted_partial_frame_; |
156 | 148 |
157 // Temporary buffer to hold crossfade data. | |
158 scoped_ptr<AudioBus> crossfade_buffer_; | |
159 | |
160 // Window size, in frames (calculated from audio properties). | |
161 int window_size_; | |
162 | |
163 // How many frames to have in the queue before we report the queue is full. | 149 // How many frames to have in the queue before we report the queue is full. |
164 int capacity_; | 150 int capacity_; |
165 | 151 |
| 152 // Book keeping of the current time of generated audio, in frames. This |
| 153 // should be appropriately updated when out samples are generated, regardless |
| 154 // of whether we push samples out when FillBuffer() is called or we store |
| 155 // audio in |wsola_output_| for the subsequent calls to FillBuffer(). |
| 156 // Furthermore, if samples from |audio_buffer_| are evicted then this |
| 157 // member variable should be updated based on |playback_rate_|. |
| 158 // Note that this member should be updated ONLY by calling UpdateOutputTime(), |
| 159 // so that |search_block_index_| is update accordingly. |
| 160 float output_time_; |
| 161 |
| 162 // The offset of the center frame of |search_block_| w.r.t. its first frame. |
| 163 int search_block_center_offset_; |
| 164 |
| 165 // Index of the beginning of the |search_block_|, in frames. |
| 166 int search_block_index_; |
| 167 |
| 168 // Number of Blocks to search to find the most similar one to the target |
| 169 // frame. |
| 170 int num_candidate_blocks_; |
| 171 |
| 172 // Index of the beginning of the target block, counted in frames. |
| 173 int target_block_index_; |
| 174 |
| 175 // Overlap-and-add window size in frames. |
| 176 int ola_window_size_; |
| 177 |
| 178 // The hop size of overlap-and-add in frames. This implementation assumes 50% |
| 179 // overlap-and-add. |
| 180 int ola_hop_size_; |
| 181 |
| 182 // Number of frames in |wsola_output_| that overlap-and-add is completed for |
| 183 // them and can be copied to output if FillBuffer() is called. It also |
| 184 // specifies the index where the next WSOLA window has to overlap-and-add. |
| 185 int num_complete_frames_; |
| 186 |
| 187 // This stores a part of the output that is created but couldn't be rendered. |
| 188 // Output is generated frame-by-frame which at some point might exceed the |
| 189 // number of requested samples. Furthermore, due to overlap-and-add, |
| 190 // the last half-window of the output is incomplete, which is stored in this |
| 191 // buffer. |
| 192 scoped_ptr<AudioBus> wsola_output_; |
| 193 |
| 194 // Overlap-and-add window. |
| 195 scoped_ptr<float[]> ola_window_; |
| 196 |
| 197 // Transition window, used to update |optimal_block_| by a weighted sum of |
| 198 // |optimal_block_| and |target_block_|. |
| 199 scoped_ptr<float[]> transition_window_; |
| 200 |
| 201 // Auxiliary variables to avoid allocation in every iteration. |
| 202 |
| 203 // Stores the optimal block in every iteration. This is the most |
| 204 // similar block to |target_block_| within |search_block_| and it is |
| 205 // overlap-and-added to |wsola_output_|. |
| 206 scoped_ptr<AudioBus> optimal_block_; |
| 207 |
| 208 // A block of data that search is performed over to find the |optimal_block_|. |
| 209 scoped_ptr<AudioBus> search_block_; |
| 210 |
| 211 // Stores the target block, denoted as |target| above. |search_block_| is |
| 212 // searched for a block (|optimal_block_|) that is most similar to |
| 213 // |target_block_|. |
| 214 scoped_ptr<AudioBus> target_block_; |
| 215 |
166 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); | 216 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); |
167 }; | 217 }; |
168 | 218 |
169 } // namespace media | 219 } // namespace media |
170 | 220 |
171 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ | 221 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_ |
OLD | NEW |