Index: media/filters/audio_renderer_algorithm.h |
diff --git a/media/filters/audio_renderer_algorithm.h b/media/filters/audio_renderer_algorithm.h |
index 26790b996ac360782e4e7eb16d6e082da91c5777..b940ad2fac439db31ab3fdf8b7a9495b60760abc 100644 |
--- a/media/filters/audio_renderer_algorithm.h |
+++ b/media/filters/audio_renderer_algorithm.h |
@@ -84,46 +84,42 @@ class MEDIA_EXPORT AudioRendererAlgorithm { |
bool is_muted() { return muted_; } |
private: |
- // Fills |dest| with up to |requested_frames| frames of audio data at faster |
- // than normal speed. Returns the number of frames inserted into |dest|. If |
- // not enough data available, returns 0. |
- // |
- // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish |
- // audio output while preserving pitch. Essentially, we play a bit of audio |
- // data at normal speed, then we "fast forward" by dropping the next bit of |
- // audio data, and then we stich the pieces together by crossfading from one |
- // audio chunk to the next. |
- int OutputFasterPlayback(AudioBus* dest, |
- int dest_offset, |
- int requested_frames, |
- int input_step, |
- int output_step); |
- |
- // Fills |dest| with up to |requested_frames| frames of audio data at slower |
- // than normal speed. Returns the number of frames inserted into |dest|. If |
- // not enough data available, returns 0. |
- // |
- // When the audio playback is < 1.0, we use a variant of Overlap-Add to |
- // stretch audio output while preserving pitch. This works by outputting a |
- // segment of audio data at normal speed. The next audio segment then starts |
- // by repeating some of the audio data from the previous audio segment. |
- // Segments are stiched together by crossfading from one audio chunk to the |
- // next. |
- int OutputSlowerPlayback(AudioBus* dest, |
- int dest_offset, |
- int requested_frames, |
- int input_step, |
- int output_step); |
- |
- // Resets the window state to the start of a new window. |
- void ResetWindow(); |
- |
- // Does a linear crossfade from |intro| into |outtro| for one frame. |
- void CrossfadeFrame(AudioBus* intro, |
- int intro_offset, |
- AudioBus* outtro, |
- int outtro_offset, |
- int fade_offset); |
+ // Run WSOLA on the input to obtain |requested_frames| output frames and |
+ // write them to |dest|). |
+ int WsolaOutput(int requested_frames, AudioBus* dest); |
+ |
+ // Within the search region, find the block of data that is most similar to |
+ // target window, and write it in |optimal_block|. |
+ void GetOptimalBlock(AudioBus* optimal_block); |
+ |
+ // Read at maximum of |requested_frames| frames from |wsola_output_|. Returns |
+ // number of frames actually read. |
+ int ReadWsolaOutput(int requested_frames, int output_offset, AudioBus* dest); |
+ |
+ // Fill |dest| with frames from |audio_buffer_| starting form frame |
ajm
2013/07/23 18:03:28
form -> from
turaj
2013/07/29 22:09:57
Done.
|
+ // |read_offset_frames|. |dest| is expected to have same number of channels |
ajm
2013/07/23 18:03:28
the same
turaj
2013/07/29 22:09:57
Done.
|
+ // as |audio_buffer_|. Negative offsets, i.e. |read_offset_frames| < 0, are |
+ // accepted assuming that |audio_buffer| is zero for negative indices. |
+ // False will be returned if it is required to read beyond the last frame |
+ // of |audio_buffer_|, otherwise true is returned. |
+ bool PeekAudioWithZerroAppend(int read_offset_frames, AudioBus* dest); |
+ |
+ // Run WSOLA to get one block of output. |
+ void Wsola(); |
+ |
+ // Seek |audio_buffer_| forward to remove frames from input that is not used |
+ // any more. State of the WSOLA should be updated accordingly. |
+ void RemoveOldInputFrames(); |
+ |
+ // Return the index to the first frame of the search region. |
+ int GetSearchRegionIndex() const; |
+ |
+ // Is the target block within search region. If so, we don't need to perform |
+ // the search. |
+ bool TargetIsWithinSearchRegion() const; |
+ |
+ // Do we have enough data to perform one round of WSOLA? |
+ bool CanPerformWsola() const; |
// Number of channels in audio stream. |
int channels_; |
@@ -137,32 +133,101 @@ class MEDIA_EXPORT AudioRendererAlgorithm { |
// Buffered audio data. |
AudioBufferQueue audio_buffer_; |
- // Length for crossfade in frames. |
- int frames_in_crossfade_; |
- |
- // The current location in the audio window, between 0 and |window_size_|. |
- // When |index_into_window_| reaches |window_size_|, the window resets. |
- // Indexed by frame. |
- int index_into_window_; |
- |
- // The frame number in the crossfade. |
- int crossfade_frame_number_; |
- |
// True if the audio should be muted. |
bool muted_; |
// If muted, keep track of partial frames that should have been skipped over. |
double muted_partial_frame_; |
- // Temporary buffer to hold crossfade data. |
- scoped_ptr<AudioBus> crossfade_buffer_; |
- |
- // Window size, in frames (calculated from audio properties). |
- int window_size_; |
- |
// How many frames to have in the queue before we report the queue is full. |
int capacity_; |
+ // WSOLA variables. |
ajm
2013/07/23 18:03:28
Do you mention what this stands for anywhere?
turaj
2013/07/29 22:09:57
Done.
|
+ // |
+ // This is how WSOLA with 50% overlap-add works: |
+ // |
+ // Notation: |
+ // |
+ // |W| overlap-and-add (OLA) window. |
+ // |L| size of |W| in samples. |
+ // |alpha| playback-rate, where values less than 1 indicate a slowed-down |
+ // playout (output is longer than input). |
+ // |ts_out| current timestamp of output. |
+ // |target| target-frame, we search the input to find a frame that is most |
ajm
2013/07/23 18:03:28
In Chrome, "frame" is used to mean a single-channe
turaj
2013/07/29 22:09:57
The description was written before I realize the u
|
+ // similar to |target|. Similarity is measured by the correlation |
+ // between two given frame. |
+ // |tau| a parameter defining the search interval. The search interval for |
+ // the best matched to |target| is |
+ // [|ts_out|*|alpha|-|tau|, |ts_out|*|alpha|+|tau|]. |
+ // |
+ // Assume we start at time 0, i.e. beginning of both input |
+ // and output streams. |
+ // |
+ // 1) Initialize the output with the faded-out version of the first |L/2| |
+ // samples of the input. The faded-out version is constructed by |
+ // multiplying |L/2| input samples with the second half of OLA window, |W|. |
+ // |
+ // 2) Set the timestamp of output, |ts_out|, to |L/2|. |
+ // |
+ // 3) |target| is samples [0, L) of the input. This is the "natural" |
+ // continuation to the output (given 50% overlap-and-add). |
+ // |
+ // 4) Search interval of input is then centered at |t_out| * |alpha| with |
ajm
2013/07/23 18:03:28
ts_out
turaj
2013/07/29 22:09:57
Done.
|
+ // the width of 2 * |tau|, i.e. |ts_out|*|alpha| + [-|tau|, |tau|]. |
ajm
2013/07/23 18:03:28
Space around * to be consistent.
turaj
2013/07/29 22:09:57
Done.
|
+ // |
+ // 5) Find a frame which is centered within the search interval and is most |
+ // similar to |target|. |
+ // Let the optimal frame be |opt| and its center be |t_in_opt|. |
ajm
2013/07/23 18:03:28
ts_in_opt? Or just change all ts -> t
turaj
2013/07/29 22:09:57
Done.
|
+ // |
+ // 6) Overlap-and-add |opt| * |W| frame to output. |
+ // |
+ // 7) |ts_out| = |ts_out| + |L/2| |
+ // Let |target| be the frame of the input centered at |t_in_opt| + |L/2|. |
+ // Note that now |target| is the natural continuation to the current |
+ // output (the frame that follows |opt| in overlap-and-add sense). |
+ // Continue from step 4. |
+ // |
+ |
+ // Book keeping of the current index of generated audio, in frames. This |
+ // should be appropriately update when out samples are generated, regardless |
ajm
2013/07/23 18:03:28
updated
turaj
2013/07/29 22:09:57
Done.
|
+ // of whether we push samples out when FillBuffer() is called or we store |
+ // audio in |wsola_output| for the subsequent calls to FillBuffer(). |
+ // Furthermore, if samples form input |audio_buffer_| are evicted then this |
ajm
2013/07/23 18:03:28
from
turaj
2013/07/29 22:09:57
Done.
|
+ // variable should be updated accordingly, based on |playback_rate_|. |
+ int output_index_; |
+ |
+ // The offset of the search center frame w.r.t. the first frame. |
+ int search_region_center_offset_; |
+ |
+ // Number of frames to search to find the most similar one to the target |
+ // frame. |
+ int num_candid_frames_; |
ajm
2013/07/23 18:03:28
Is this short for candidate? Don't shorten it.
turaj
2013/07/29 22:09:57
Done.
|
+ |
+ // Index of the beginning of the target window, counted in frames. |
+ int target_window_index_; |
+ |
+ // Overlap-and-add window size in frames. |
ajm
2013/07/23 18:03:28
Perhaps refer to your description above (|L|).
turaj
2013/07/29 22:09:57
Done.
|
+ int ola_window_size_; |
+ |
+ // The hop size of overlap-and-add in frames, this implementation assumes |
ajm
2013/07/23 18:03:28
frames (|L/2|). This ...
turaj
2013/07/29 22:09:57
Done.
|
+ // 50% overlap-and-add. |
+ int ola_hop_size_; |
+ |
+ int num_complete_frames_; |
+ |
+ // This stores a part of the output that is created but couldn't be rendered. |
+ // Output is generated frame-by-frame which at some point might exceed the |
+ // number of requested samples. Furthermore, due to overlap-and-add, |
+ // the last half-window of the output is incomplete, which is stored in this |
+ // buffer. |
+ scoped_ptr<AudioBus> wsola_output_; |
+ |
+ // Overlap-and-add window. |
ajm
2013/07/23 18:03:28
Explain more, or remove the comment.
turaj
2013/07/29 22:09:57
Done.
|
+ scoped_ptr<float[]> ola_window_; |
+ |
+ // Transition window. |
ajm
2013/07/23 18:03:28
Explain more.
turaj
2013/07/29 22:09:57
Done.
|
+ scoped_ptr<float[]> transition_window_; |
+ |
DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm); |
}; |