media/filters/audio_renderer_algorithm.h - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Unified Diff: media/filters/audio_renderer_algorithm.h

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/filters/audio_renderer_algorithm.h

diff --git a/media/filters/audio_renderer_algorithm.h b/media/filters/audio_renderer_algorithm.h

index 26790b996ac360782e4e7eb16d6e082da91c5777..b940ad2fac439db31ab3fdf8b7a9495b60760abc 100644

--- a/media/filters/audio_renderer_algorithm.h

+++ b/media/filters/audio_renderer_algorithm.h

@@ -84,46 +84,42 @@ class MEDIA_EXPORT AudioRendererAlgorithm {

bool is_muted() { return muted_; }

private:

- // Fills |dest| with up to |requested_frames| frames of audio data at faster

- // than normal speed. Returns the number of frames inserted into |dest|. If

- // not enough data available, returns 0.

- //

- // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish

- // audio output while preserving pitch. Essentially, we play a bit of audio

- // data at normal speed, then we "fast forward" by dropping the next bit of

- // audio data, and then we stich the pieces together by crossfading from one

- // audio chunk to the next.

- int OutputFasterPlayback(AudioBus* dest,

- int dest_offset,

- int requested_frames,

- int input_step,

- int output_step);

- // Fills |dest| with up to |requested_frames| frames of audio data at slower

- // than normal speed. Returns the number of frames inserted into |dest|. If

- // not enough data available, returns 0.

- //

- // When the audio playback is < 1.0, we use a variant of Overlap-Add to

- // stretch audio output while preserving pitch. This works by outputting a

- // segment of audio data at normal speed. The next audio segment then starts

- // by repeating some of the audio data from the previous audio segment.

- // Segments are stiched together by crossfading from one audio chunk to the

- // next.

- int OutputSlowerPlayback(AudioBus* dest,

- int dest_offset,

- int requested_frames,

- int input_step,

- int output_step);

- // Resets the window state to the start of a new window.

- void ResetWindow();

- // Does a linear crossfade from |intro| into |outtro| for one frame.

- void CrossfadeFrame(AudioBus* intro,

- int intro_offset,

- AudioBus* outtro,

- int outtro_offset,

- int fade_offset);

+ // Run WSOLA on the input to obtain |requested_frames| output frames and

+ // write them to |dest|).

+ int WsolaOutput(int requested_frames, AudioBus* dest);

+ // Within the search region, find the block of data that is most similar to

+ // target window, and write it in |optimal_block|.

+ void GetOptimalBlock(AudioBus* optimal_block);

+ // Read at maximum of |requested_frames| frames from |wsola_output_|. Returns

+ // number of frames actually read.

+ int ReadWsolaOutput(int requested_frames, int output_offset, AudioBus* dest);

+ // Fill |dest| with frames from |audio_buffer_| starting form frame

ajm 2013/07/23 18:03:28 form -> from

turaj 2013/07/29 22:09:57 Done.

+ // |read_offset_frames|. |dest| is expected to have same number of channels

ajm 2013/07/23 18:03:28 the same

turaj 2013/07/29 22:09:57 Done.

+ // as |audio_buffer_|. Negative offsets, i.e. |read_offset_frames| < 0, are

+ // accepted assuming that |audio_buffer| is zero for negative indices.

+ // False will be returned if it is required to read beyond the last frame

+ // of |audio_buffer_|, otherwise true is returned.

+ bool PeekAudioWithZerroAppend(int read_offset_frames, AudioBus* dest);

+ // Run WSOLA to get one block of output.

+ void Wsola();

+ // Seek |audio_buffer_| forward to remove frames from input that is not used

+ // any more. State of the WSOLA should be updated accordingly.

+ void RemoveOldInputFrames();

+ // Return the index to the first frame of the search region.

+ int GetSearchRegionIndex() const;

+ // Is the target block within search region. If so, we don't need to perform

+ // the search.

+ bool TargetIsWithinSearchRegion() const;

+ // Do we have enough data to perform one round of WSOLA?

+ bool CanPerformWsola() const;

// Number of channels in audio stream.

int channels_;

@@ -137,32 +133,101 @@ class MEDIA_EXPORT AudioRendererAlgorithm {

// Buffered audio data.

AudioBufferQueue audio_buffer_;

- // Length for crossfade in frames.

- int frames_in_crossfade_;

- // The current location in the audio window, between 0 and |window_size_|.

- // When |index_into_window_| reaches |window_size_|, the window resets.

- // Indexed by frame.

- int index_into_window_;

- // The frame number in the crossfade.

- int crossfade_frame_number_;

// True if the audio should be muted.

bool muted_;

// If muted, keep track of partial frames that should have been skipped over.

double muted_partial_frame_;

- // Temporary buffer to hold crossfade data.

- scoped_ptr<AudioBus> crossfade_buffer_;

- // Window size, in frames (calculated from audio properties).

- int window_size_;

// How many frames to have in the queue before we report the queue is full.

int capacity_;

+ // WSOLA variables.

ajm 2013/07/23 18:03:28 Do you mention what this stands for anywhere?

turaj 2013/07/29 22:09:57 Done.

+ //

+ // This is how WSOLA with 50% overlap-add works:

+ //

+ // Notation:

+ //

+ // |W| overlap-and-add (OLA) window.

+ // |L| size of |W| in samples.

+ // |alpha| playback-rate, where values less than 1 indicate a slowed-down

+ // playout (output is longer than input).

+ // |ts_out| current timestamp of output.

+ // |target| target-frame, we search the input to find a frame that is most

ajm 2013/07/23 18:03:28 In Chrome, "frame" is used to mean a single-channe

turaj 2013/07/29 22:09:57 The description was written before I realize the u

+ // similar to |target|. Similarity is measured by the correlation

+ // between two given frame.

+ // |tau| a parameter defining the search interval. The search interval for

+ // the best matched to |target| is

+ // [|ts_out|*|alpha|-|tau|, |ts_out|*|alpha|+|tau|].

+ //

+ // Assume we start at time 0, i.e. beginning of both input

+ // and output streams.

+ //

+ // 1) Initialize the output with the faded-out version of the first |L/2|

+ // samples of the input. The faded-out version is constructed by

+ // multiplying |L/2| input samples with the second half of OLA window, |W|.

+ //

+ // 2) Set the timestamp of output, |ts_out|, to |L/2|.

+ //

+ // 3) |target| is samples [0, L) of the input. This is the "natural"

+ // continuation to the output (given 50% overlap-and-add).

+ //

+ // 4) Search interval of input is then centered at |t_out| * |alpha| with

ajm 2013/07/23 18:03:28 ts_out

turaj 2013/07/29 22:09:57 Done.

+ // the width of 2 * |tau|, i.e. |ts_out|*|alpha| + [-|tau|, |tau|].

ajm 2013/07/23 18:03:28 Space around * to be consistent.

turaj 2013/07/29 22:09:57 Done.

+ //

+ // 5) Find a frame which is centered within the search interval and is most

+ // similar to |target|.

+ // Let the optimal frame be |opt| and its center be |t_in_opt|.

ajm 2013/07/23 18:03:28 ts_in_opt? Or just change all ts -> t

turaj 2013/07/29 22:09:57 Done.

+ //

+ // 6) Overlap-and-add |opt| * |W| frame to output.

+ //

+ // 7) |ts_out| = |ts_out| + |L/2|

+ // Let |target| be the frame of the input centered at |t_in_opt| + |L/2|.

+ // Note that now |target| is the natural continuation to the current

+ // output (the frame that follows |opt| in overlap-and-add sense).

+ // Continue from step 4.

+ //

+ // Book keeping of the current index of generated audio, in frames. This

+ // should be appropriately update when out samples are generated, regardless

ajm 2013/07/23 18:03:28 updated

turaj 2013/07/29 22:09:57 Done.

+ // of whether we push samples out when FillBuffer() is called or we store

+ // audio in |wsola_output| for the subsequent calls to FillBuffer().

+ // Furthermore, if samples form input |audio_buffer_| are evicted then this

ajm 2013/07/23 18:03:28 from

turaj 2013/07/29 22:09:57 Done.

+ // variable should be updated accordingly, based on |playback_rate_|.

+ int output_index_;

+ // The offset of the search center frame w.r.t. the first frame.

+ int search_region_center_offset_;

+ // Number of frames to search to find the most similar one to the target

+ // frame.

+ int num_candid_frames_;

ajm 2013/07/23 18:03:28 Is this short for candidate? Don't shorten it.

turaj 2013/07/29 22:09:57 Done.

+ // Index of the beginning of the target window, counted in frames.

+ int target_window_index_;

+ // Overlap-and-add window size in frames.

ajm 2013/07/23 18:03:28 Perhaps refer to your description above (|L|).

turaj 2013/07/29 22:09:57 Done.

+ int ola_window_size_;

+ // The hop size of overlap-and-add in frames, this implementation assumes

ajm 2013/07/23 18:03:28 frames (|L/2|). This ...

turaj 2013/07/29 22:09:57 Done.

+ // 50% overlap-and-add.

+ int ola_hop_size_;

+ int num_complete_frames_;

+ // This stores a part of the output that is created but couldn't be rendered.

+ // Output is generated frame-by-frame which at some point might exceed the

+ // number of requested samples. Furthermore, due to overlap-and-add,

+ // the last half-window of the output is incomplete, which is stored in this

+ // buffer.

+ scoped_ptr<AudioBus> wsola_output_;

+ // Overlap-and-add window.

ajm 2013/07/23 18:03:28 Explain more, or remove the comment.

turaj 2013/07/29 22:09:57 Done.

+ scoped_ptr<float[]> ola_window_;

+ // Transition window.

ajm 2013/07/23 18:03:28 Explain more.

turaj 2013/07/29 22:09:57 Done.

+ scoped_ptr<float[]> transition_window_;

DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);

};

« no previous file with comments | « no previous file | media/filters/audio_renderer_algorithm.cc » ('j') | media/filters/audio_renderer_algorithm.cc » ('J')