Chromium Code Reviews| Index: media/filters/audio_renderer_algorithm.h |
| diff --git a/media/filters/audio_renderer_algorithm.h b/media/filters/audio_renderer_algorithm.h |
| index 26790b996ac360782e4e7eb16d6e082da91c5777..4561bb5369d01ececf51ea33ee3795649109fd95 100644 |
| --- a/media/filters/audio_renderer_algorithm.h |
| +++ b/media/filters/audio_renderer_algorithm.h |
| @@ -84,46 +84,47 @@ class MEDIA_EXPORT AudioRendererAlgorithm { |
| bool is_muted() { return muted_; } |
| private: |
| - // Fills |dest| with up to |requested_frames| frames of audio data at faster |
| - // than normal speed. Returns the number of frames inserted into |dest|. If |
| - // not enough data available, returns 0. |
| - // |
| - // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish |
| - // audio output while preserving pitch. Essentially, we play a bit of audio |
| - // data at normal speed, then we "fast forward" by dropping the next bit of |
| - // audio data, and then we stich the pieces together by crossfading from one |
| - // audio chunk to the next. |
| - int OutputFasterPlayback(AudioBus* dest, |
| - int dest_offset, |
| - int requested_frames, |
| - int input_step, |
| - int output_step); |
| - |
| - // Fills |dest| with up to |requested_frames| frames of audio data at slower |
| - // than normal speed. Returns the number of frames inserted into |dest|. If |
| - // not enough data available, returns 0. |
| - // |
| - // When the audio playback is < 1.0, we use a variant of Overlap-Add to |
| - // stretch audio output while preserving pitch. This works by outputting a |
| - // segment of audio data at normal speed. The next audio segment then starts |
| - // by repeating some of the audio data from the previous audio segment. |
| - // Segments are stiched together by crossfading from one audio chunk to the |
| - // next. |
| - int OutputSlowerPlayback(AudioBus* dest, |
| - int dest_offset, |
| - int requested_frames, |
| - int input_step, |
| - int output_step); |
| - |
| - // Resets the window state to the start of a new window. |
| - void ResetWindow(); |
| - |
| - // Does a linear crossfade from |intro| into |outtro| for one frame. |
| - void CrossfadeFrame(AudioBus* intro, |
| - int intro_offset, |
| - AudioBus* outtro, |
| - int outtro_offset, |
| - int fade_offset); |
| + // Within the search region, find the block of data that is most similar to |
| + // target block, and write it in |optimal_block_|. Returns false it there is |
|
DaleCurtis
2013/08/13 21:11:04
s/target block/|target_block_|/
turaj
2013/08/16 22:13:56
comment rephrased.
On 2013/08/13 21:11:04, DaleCu
|
| + // not enough data to perform search. This is the case if either |
| + // |target_block_| or |search_block_| extend into the future, i.e more input |
| + // is required. Otherwise true is returned. |
| + bool GetOptimalBlock(); |
| + |
| + // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns |
| + // number of frames actually read. |
| + int WriteCompletedFramesTo( |
| + int requested_frames, int output_offset, AudioBus* dest); |
| + |
| + // Fill |dest| with frames from |audio_buffer_| starting from frame |
| + // |read_offset_frames|. |dest| is expected to have the same number of |
| + // channels as |audio_buffer_|. A Negative offset, i.e. |
|
DaleCurtis
2013/08/13 21:11:04
s/Negative/negative/
turaj
2013/08/16 22:13:56
Done.
|
| + // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero |
| + // for negative indices. This might happen for few first frames. False will |
| + // be returned if it is required to read beyond the last frame of |
| + // |audio_buffer_|, otherwise true is returned. |
| + bool PeekAudioWithZeroAppend(int read_offset_frames, AudioBus* dest); |
| + |
| + // Run one iteration of WSOLA, if there are sufficient frames. This will |
| + // extend the output by |ola_hop_size_|, written to |wsola_output_|. Then, |
| + // at most |requested_frames| frames are read and written to |dest|, starting |
| + // at |dest_offset| frame. The number of frames |
| + // which is actually written to |dest| is returned. |
| + bool WsolaIteration(); |
|
DaleCurtis
2013/08/13 21:11:04
RunOneWsolaIteration() ?
turaj
2013/08/16 22:13:56
Done.
|
| + |
| + // Seek |audio_buffer_| forward to remove frames from input that are not used |
| + // any more. State of the WSOLA will be updated accordingly. |
| + void RemoveOldInputFrames(); |
| + |
| + // Return the index to the first frame of the search region. |
| + int GetSearchRegionIndex() const; |
| + |
| + // Is the target block fully within search region? If so, we don't need to |
|
DaleCurtis
2013/08/13 21:11:04
Use explicit |target_block_| and |search_block_|
turaj
2013/08/16 22:13:56
Done.
|
| + // perform the search. |
| + bool TargetIsWithinSearchRegion() const; |
| + |
| + // Do we have enough data to perform one round of WSOLA? |
| + bool CanPerformWsola() const; |
| // Number of channels in audio stream. |
| int channels_; |
| @@ -137,32 +138,123 @@ class MEDIA_EXPORT AudioRendererAlgorithm { |
| // Buffered audio data. |
| AudioBufferQueue audio_buffer_; |
| - // Length for crossfade in frames. |
| - int frames_in_crossfade_; |
| - |
| - // The current location in the audio window, between 0 and |window_size_|. |
| - // When |index_into_window_| reaches |window_size_|, the window resets. |
| - // Indexed by frame. |
| - int index_into_window_; |
| - |
| - // The frame number in the crossfade. |
| - int crossfade_frame_number_; |
| - |
| // True if the audio should be muted. |
| bool muted_; |
| // If muted, keep track of partial frames that should have been skipped over. |
| double muted_partial_frame_; |
| - // Temporary buffer to hold crossfade data. |
| - scoped_ptr<AudioBus> crossfade_buffer_; |
| - |
| - // Window size, in frames (calculated from audio properties). |
| - int window_size_; |
| - |
| // How many frames to have in the queue before we report the queue is full. |
| int capacity_; |
| + // Waveform Similarity Overlap-and-add (WSOLA) variables. |
|
DaleCurtis
2013/08/13 21:11:04
This is more of an algorithm description and shoul
turaj
2013/08/16 22:13:56
I thought a description of algorithm helps underst
DaleCurtis
2013/08/19 22:15:23
Feel free to add a comment telling readers to look
turaj
2013/08/21 01:01:19
Done. If you feel appropriate I can give reference
|
| + // |
| + // This is how WSOLA with 50% overlap-add works: |
| + // |
| + // Notation: |
| + // |
| + // |W| overlap-and-add (OLA) window. |
| + // |L| size of |W| in samples. |
| + // |alpha| playback-rate, where values less than 1 indicate a slowed-down |
| + // playout (output is longer than input). |
| + // |ts_out| current timestamp of output. |
| + // |target| target-block, we search the input to find a block that is most |
| + // similar to |target|. Similarity is measured by the correlation |
| + // between two given blocks. |
| + // |tau| a parameter defining the search interval. The search interval for |
| + // the best matched to |target| is |
| + // [|ts_out|*|alpha|-|tau|, |ts_out|*|alpha|+|tau|]. |
| + // |U| Transition Window. See the step 5) for the usage of this window. |
| + // |
| + // Assume we start at time 0, i.e. beginning of both input |
| + // and output streams. |
| + // |
| + // 1) Initialize the output with the faded-out version of the first |L/2| |
| + // samples of the input. The faded-out version is constructed by |
| + // multiplying |L/2| input samples with the second half of OLA window, |W|. |
| + // |
| + // 2) Set the timestamp of output, |ts_out|, to |L/2|. |
| + // |
| + // 3) |target| is samples [0, L) of the input. This is the "natural" |
| + // continuation to the output (given 50% overlap-and-add). |
| + // |
| + // 4) Search interval of input is then centered at |ts_out| * |alpha| with |
| + // the width of 2 * |tau|, i.e. |ts_out| * |alpha| + [-|tau|, |tau|]. |
| + // |
| + // 5) Find a frame which is centered within the search interval and is most |
| + // similar to |target|. Let |Q| be the most similar block to |target| |
| + // centered at |ts_in_opt|. |
| + // We compute the optimal block as |opt| = |U| * |target| + |
| + // (1 - |U|) * |Q|. |
| + // |
| + // 6) Overlap-and-add |opt| to the output. That is to add |opt| * |W| to the |
| + // output with |L/2| samples overlap. |
| + // |
| + // 7) |ts_out| = |ts_out| + |L/2| |
| + // Let |target| be the frame of the input centered at |ts_in_opt| + |L/2|. |
| + // Note that now |target| is the natural continuation to the current |
| + // output (the frame that follows |opt| in overlap-and-add sense). |
| + // Continue from step 4. |
| + // |
| + |
| + // Book keeping of the current index of generated audio, in frames. This |
| + // should be appropriately updated when out samples are generated, regardless |
| + // of whether we push samples out when FillBuffer() is called or we store |
| + // audio in |wsola_output| for the subsequent calls to FillBuffer(). |
| + // Furthermore, if samples from input |audio_buffer_| are evicted then this |
| + // variable should be updated accordingly, based on |playback_rate_|. |
| + int output_index_; |
| + |
| + // The offset of the search center frame w.r.t. the first frame. |
| + int search_block_center_offset_; |
| + |
| + // Number of Blocks to search to find the most similar one to the target |
| + // frame. |
| + int num_candidate_blocks_; |
| + |
| + // Index of the beginning of the target block, counted in frames. |
| + int target_block_index_; |
| + |
| + // Overlap-and-add window size in frames, denoted as |L| in WSOAL description. |
| + int ola_window_size_; |
| + |
| + // The hop size of overlap-and-add in frames (|L/2|). This implementation |
| + // assumes 50% overlap-and-add. |
| + int ola_hop_size_; |
| + |
| + // Number of frames in |wsola_output_| that overlap-and-add is completed for |
| + // them and can be copied to output if FillBuffer() is called. It also |
| + // specifies the index where the next WSOLA window has to overlap-and-add. |
| + int num_complete_frames_; |
| + |
| + // This stores a part of the output that is created but couldn't be rendered. |
| + // Output is generated frame-by-frame which at some point might exceed the |
| + // number of requested samples. Furthermore, due to overlap-and-add, |
| + // the last half-window of the output is incomplete, which is stored in this |
| + // buffer. |
| + scoped_ptr<AudioBus> wsola_output_; |
| + |
| + // Overlap-and-add window, denoted as |W| in the above (see step 6). |
| + scoped_ptr<float[]> ola_window_; |
| + |
| + // Transition window, denoted as |U| in the above (see step 5). |
| + scoped_ptr<float[]> transition_window_; |
| + |
| + // Auxiliary variables to avoid allocation in every iteration. |
| + |
| + // Stores the optimal block in every iteration. This is the most |
| + // similar block to |target_block_| within |search_block_| and it is |
| + // overlap-and-added to |wsola_output_|. |
| + scoped_ptr<AudioBus> optimal_block_; |
| + |
| + // A block of data that search is performed over to find the |optimal_block_|. |
| + scoped_ptr<AudioBus> search_block_; |
| + |
| + // Stores the target block, denoted as |target| above. |search_block_| is |
| + // searched for a block (|optimal_block_|) that is most similar to |
| + // |target_block_|. |
| + scoped_ptr<AudioBus> target_block_; |
| + |
| DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); |
| }; |