Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(395)

Unified Diff: media/filters/audio_renderer_algorithm.h

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: "Dale's and Marco's comments are addressed." Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | media/filters/audio_renderer_algorithm.cc » ('j') | media/filters/audio_renderer_algorithm.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: media/filters/audio_renderer_algorithm.h
diff --git a/media/filters/audio_renderer_algorithm.h b/media/filters/audio_renderer_algorithm.h
index 26790b996ac360782e4e7eb16d6e082da91c5777..4561bb5369d01ececf51ea33ee3795649109fd95 100644
--- a/media/filters/audio_renderer_algorithm.h
+++ b/media/filters/audio_renderer_algorithm.h
@@ -84,46 +84,47 @@ class MEDIA_EXPORT AudioRendererAlgorithm {
bool is_muted() { return muted_; }
private:
- // Fills |dest| with up to |requested_frames| frames of audio data at faster
- // than normal speed. Returns the number of frames inserted into |dest|. If
- // not enough data available, returns 0.
- //
- // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish
- // audio output while preserving pitch. Essentially, we play a bit of audio
- // data at normal speed, then we "fast forward" by dropping the next bit of
- // audio data, and then we stich the pieces together by crossfading from one
- // audio chunk to the next.
- int OutputFasterPlayback(AudioBus* dest,
- int dest_offset,
- int requested_frames,
- int input_step,
- int output_step);
-
- // Fills |dest| with up to |requested_frames| frames of audio data at slower
- // than normal speed. Returns the number of frames inserted into |dest|. If
- // not enough data available, returns 0.
- //
- // When the audio playback is < 1.0, we use a variant of Overlap-Add to
- // stretch audio output while preserving pitch. This works by outputting a
- // segment of audio data at normal speed. The next audio segment then starts
- // by repeating some of the audio data from the previous audio segment.
- // Segments are stiched together by crossfading from one audio chunk to the
- // next.
- int OutputSlowerPlayback(AudioBus* dest,
- int dest_offset,
- int requested_frames,
- int input_step,
- int output_step);
-
- // Resets the window state to the start of a new window.
- void ResetWindow();
-
- // Does a linear crossfade from |intro| into |outtro| for one frame.
- void CrossfadeFrame(AudioBus* intro,
- int intro_offset,
- AudioBus* outtro,
- int outtro_offset,
- int fade_offset);
+ // Within the search region, find the block of data that is most similar to
+ // target block, and write it in |optimal_block_|. Returns false it there is
DaleCurtis 2013/08/13 21:11:04 s/target block/|target_block_|/
turaj 2013/08/16 22:13:56 comment rephrased. On 2013/08/13 21:11:04, DaleCu
+ // not enough data to perform search. This is the case if either
+ // |target_block_| or |search_block_| extend into the future, i.e more input
+ // is required. Otherwise true is returned.
+ bool GetOptimalBlock();
+
+ // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns
+ // number of frames actually read.
+ int WriteCompletedFramesTo(
+ int requested_frames, int output_offset, AudioBus* dest);
+
+ // Fill |dest| with frames from |audio_buffer_| starting from frame
+ // |read_offset_frames|. |dest| is expected to have the same number of
+ // channels as |audio_buffer_|. A Negative offset, i.e.
DaleCurtis 2013/08/13 21:11:04 s/Negative/negative/
turaj 2013/08/16 22:13:56 Done.
+ // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero
+ // for negative indices. This might happen for few first frames. False will
+ // be returned if it is required to read beyond the last frame of
+ // |audio_buffer_|, otherwise true is returned.
+ bool PeekAudioWithZeroAppend(int read_offset_frames, AudioBus* dest);
+
+ // Run one iteration of WSOLA, if there are sufficient frames. This will
+ // extend the output by |ola_hop_size_|, written to |wsola_output_|. Then,
+ // at most |requested_frames| frames are read and written to |dest|, starting
+ // at |dest_offset| frame. The number of frames
+ // which is actually written to |dest| is returned.
+ bool WsolaIteration();
DaleCurtis 2013/08/13 21:11:04 RunOneWsolaIteration() ?
turaj 2013/08/16 22:13:56 Done.
+
+ // Seek |audio_buffer_| forward to remove frames from input that are not used
+ // any more. State of the WSOLA will be updated accordingly.
+ void RemoveOldInputFrames();
+
+ // Return the index to the first frame of the search region.
+ int GetSearchRegionIndex() const;
+
+ // Is the target block fully within search region? If so, we don't need to
DaleCurtis 2013/08/13 21:11:04 Use explicit |target_block_| and |search_block_|
turaj 2013/08/16 22:13:56 Done.
+ // perform the search.
+ bool TargetIsWithinSearchRegion() const;
+
+ // Do we have enough data to perform one round of WSOLA?
+ bool CanPerformWsola() const;
// Number of channels in audio stream.
int channels_;
@@ -137,32 +138,123 @@ class MEDIA_EXPORT AudioRendererAlgorithm {
// Buffered audio data.
AudioBufferQueue audio_buffer_;
- // Length for crossfade in frames.
- int frames_in_crossfade_;
-
- // The current location in the audio window, between 0 and |window_size_|.
- // When |index_into_window_| reaches |window_size_|, the window resets.
- // Indexed by frame.
- int index_into_window_;
-
- // The frame number in the crossfade.
- int crossfade_frame_number_;
-
// True if the audio should be muted.
bool muted_;
// If muted, keep track of partial frames that should have been skipped over.
double muted_partial_frame_;
- // Temporary buffer to hold crossfade data.
- scoped_ptr<AudioBus> crossfade_buffer_;
-
- // Window size, in frames (calculated from audio properties).
- int window_size_;
-
// How many frames to have in the queue before we report the queue is full.
int capacity_;
+ // Waveform Similarity Overlap-and-add (WSOLA) variables.
DaleCurtis 2013/08/13 21:11:04 This is more of an algorithm description and shoul
turaj 2013/08/16 22:13:56 I thought a description of algorithm helps underst
DaleCurtis 2013/08/19 22:15:23 Feel free to add a comment telling readers to look
turaj 2013/08/21 01:01:19 Done. If you feel appropriate I can give reference
+ //
+ // This is how WSOLA with 50% overlap-add works:
+ //
+ // Notation:
+ //
+ // |W| overlap-and-add (OLA) window.
+ // |L| size of |W| in samples.
+ // |alpha| playback-rate, where values less than 1 indicate a slowed-down
+ // playout (output is longer than input).
+ // |ts_out| current timestamp of output.
+ // |target| target-block, we search the input to find a block that is most
+ // similar to |target|. Similarity is measured by the correlation
+ // between two given blocks.
+ // |tau| a parameter defining the search interval. The search interval for
+ // the best matched to |target| is
+ // [|ts_out|*|alpha|-|tau|, |ts_out|*|alpha|+|tau|].
+ // |U| Transition Window. See the step 5) for the usage of this window.
+ //
+ // Assume we start at time 0, i.e. beginning of both input
+ // and output streams.
+ //
+ // 1) Initialize the output with the faded-out version of the first |L/2|
+ // samples of the input. The faded-out version is constructed by
+ // multiplying |L/2| input samples with the second half of OLA window, |W|.
+ //
+ // 2) Set the timestamp of output, |ts_out|, to |L/2|.
+ //
+ // 3) |target| is samples [0, L) of the input. This is the "natural"
+ // continuation to the output (given 50% overlap-and-add).
+ //
+ // 4) Search interval of input is then centered at |ts_out| * |alpha| with
+ // the width of 2 * |tau|, i.e. |ts_out| * |alpha| + [-|tau|, |tau|].
+ //
+ // 5) Find a frame which is centered within the search interval and is most
+ // similar to |target|. Let |Q| be the most similar block to |target|
+ // centered at |ts_in_opt|.
+ // We compute the optimal block as |opt| = |U| * |target| +
+ // (1 - |U|) * |Q|.
+ //
+ // 6) Overlap-and-add |opt| to the output. That is to add |opt| * |W| to the
+ // output with |L/2| samples overlap.
+ //
+ // 7) |ts_out| = |ts_out| + |L/2|
+ // Let |target| be the frame of the input centered at |ts_in_opt| + |L/2|.
+ // Note that now |target| is the natural continuation to the current
+ // output (the frame that follows |opt| in overlap-and-add sense).
+ // Continue from step 4.
+ //
+
+ // Book keeping of the current index of generated audio, in frames. This
+ // should be appropriately updated when out samples are generated, regardless
+ // of whether we push samples out when FillBuffer() is called or we store
+ // audio in |wsola_output| for the subsequent calls to FillBuffer().
+ // Furthermore, if samples from input |audio_buffer_| are evicted then this
+ // variable should be updated accordingly, based on |playback_rate_|.
+ int output_index_;
+
+ // The offset of the search center frame w.r.t. the first frame.
+ int search_block_center_offset_;
+
+ // Number of Blocks to search to find the most similar one to the target
+ // frame.
+ int num_candidate_blocks_;
+
+ // Index of the beginning of the target block, counted in frames.
+ int target_block_index_;
+
+ // Overlap-and-add window size in frames, denoted as |L| in WSOAL description.
+ int ola_window_size_;
+
+ // The hop size of overlap-and-add in frames (|L/2|). This implementation
+ // assumes 50% overlap-and-add.
+ int ola_hop_size_;
+
+ // Number of frames in |wsola_output_| that overlap-and-add is completed for
+ // them and can be copied to output if FillBuffer() is called. It also
+ // specifies the index where the next WSOLA window has to overlap-and-add.
+ int num_complete_frames_;
+
+ // This stores a part of the output that is created but couldn't be rendered.
+ // Output is generated frame-by-frame which at some point might exceed the
+ // number of requested samples. Furthermore, due to overlap-and-add,
+ // the last half-window of the output is incomplete, which is stored in this
+ // buffer.
+ scoped_ptr<AudioBus> wsola_output_;
+
+ // Overlap-and-add window, denoted as |W| in the above (see step 6).
+ scoped_ptr<float[]> ola_window_;
+
+ // Transition window, denoted as |U| in the above (see step 5).
+ scoped_ptr<float[]> transition_window_;
+
+ // Auxiliary variables to avoid allocation in every iteration.
+
+ // Stores the optimal block in every iteration. This is the most
+ // similar block to |target_block_| within |search_block_| and it is
+ // overlap-and-added to |wsola_output_|.
+ scoped_ptr<AudioBus> optimal_block_;
+
+ // A block of data that search is performed over to find the |optimal_block_|.
+ scoped_ptr<AudioBus> search_block_;
+
+ // Stores the target block, denoted as |target| above. |search_block_| is
+ // searched for a block (|optimal_block_|) that is most similar to
+ // |target_block_|.
+ scoped_ptr<AudioBus> target_block_;
+
DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);
};
« no previous file with comments | « no previous file | media/filters/audio_renderer_algorithm.cc » ('j') | media/filters/audio_renderer_algorithm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698