media/filters/audio_renderer_algorithm.h - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Side by Side Diff: media/filters/audio_renderer_algorithm.h

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Comments addressed, AudioRendererAlgorithmTest::WsolaTest modified. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of	5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of

6 // this object provides audio data to the object through EnqueueBuffer() and	6 // this object provides audio data to the object through EnqueueBuffer() and

7 // requests data from the buffer via FillBuffer(). The owner also sets the	7 // requests data from the buffer via FillBuffer(). The owner also sets the

8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the	8 // playback rate, and the AudioRendererAlgorithm will stretch or compress the

9 // buffered audio as necessary to match the playback rate when fulfilling	9 // buffered audio as necessary to match the playback rate when fulfilling

10 // FillBuffer() requests.	10 // FillBuffer() requests.

11 //	11 //

12 // This class is not thread-safe. Calls to enqueue and retrieve data must be	12 // This class is not thread-safe. Calls to enqueue and retrieve data must be

13 // locked if called from multiple threads.	13 // locked if called from multiple threads.

14 //	14 //

15 // AudioRendererAlgorithm uses a simple pitch-preservation algorithm to	15 // AudioRendererAlgorithm uses the Waveform Similarity Overlap and Add (WSOLA)

16 // stretch and compress audio data to meet playback speeds less than and	16 // algorithm to stretch or compress audio data to meet playback speeds less than

17 // greater than the natural playback of the audio stream.	17 // or greater than the natural playback of the audio stream. The algorithm

	18 // preserves local properties of the audio, therefore, pitch and harmonics are

	19 // are preserved. See audio_renderer_algorith.cc for a more elaborate

	20 // description of the algorithm.

18 //	21 //

19 // Audio at very low or very high playback rates are muted to preserve quality.	22 // Audio at very low or very high playback rates are muted to preserve quality.

	23 //

20	24

21 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_	25 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_

22 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_	26 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_

23	27

24 #include "base/memory/ref_counted.h"	28 #include "base/memory/ref_counted.h"

25 #include "base/memory/scoped_ptr.h"	29 #include "base/memory/scoped_ptr.h"

26 #include "media/audio/audio_parameters.h"	30 #include "media/audio/audio_parameters.h"

27 #include "media/base/audio_buffer.h"	31 #include "media/base/audio_buffer.h"

28 #include "media/base/audio_buffer_queue.h"	32 #include "media/base/audio_buffer_queue.h"

29	33

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 // than \|audio_buffer_\| was intending to hold.	81 // than \|audio_buffer_\| was intending to hold.

78 int frames_buffered() { return audio_buffer_.frames(); }	82 int frames_buffered() { return audio_buffer_.frames(); }

79	83

80 // Returns the samples per second for this audio stream.	84 // Returns the samples per second for this audio stream.

81 int samples_per_second() { return samples_per_second_; }	85 int samples_per_second() { return samples_per_second_; }

82	86

83 // Is the sound currently muted?	87 // Is the sound currently muted?

84 bool is_muted() { return muted_; }	88 bool is_muted() { return muted_; }

85	89

86 private:	90 private:

87 // Fills \|dest\| with up to \|requested_frames\| frames of audio data at faster	91 // Within \|search_block_\|, find the block of data that is most similar to

88 // than normal speed. Returns the number of frames inserted into \|dest\|. If	92 // \|target_block_\|, and write it in \|optimal_block_\|. This method assumes that

89 // not enough data available, returns 0.	93 // there is enough data to perform a search, i.e. \|search_block_\| and

90 //	94 // \|target_block_\| can be extracted from the available frames.

91 // When the audio playback is > 1.0, we use a variant of Overlap-Add to squish	95 void GetOptimalBlock();

92 // audio output while preserving pitch. Essentially, we play a bit of audio

93 // data at normal speed, then we "fast forward" by dropping the next bit of

94 // audio data, and then we stich the pieces together by crossfading from one

95 // audio chunk to the next.

96 int OutputFasterPlayback(AudioBus* dest,

97 int dest_offset,

98 int requested_frames,

99 int input_step,

100 int output_step);

101	96

102 // Fills \|dest\| with up to \|requested_frames\| frames of audio data at slower	97 // Read a maximum of \|requested_frames\| frames from \|wsola_output_\|. Returns

103 // than normal speed. Returns the number of frames inserted into \|dest\|. If	98 // number of frames actually read.

104 // not enough data available, returns 0.	99 int WriteCompletedFramesTo(

105 //	100 int requested_frames, int output_offset, AudioBus* dest);

106 // When the audio playback is < 1.0, we use a variant of Overlap-Add to

107 // stretch audio output while preserving pitch. This works by outputting a

108 // segment of audio data at normal speed. The next audio segment then starts

109 // by repeating some of the audio data from the previous audio segment.

110 // Segments are stiched together by crossfading from one audio chunk to the

111 // next.

112 int OutputSlowerPlayback(AudioBus* dest,

113 int dest_offset,

114 int requested_frames,

115 int input_step,

116 int output_step);

117	101

118 // Resets the window state to the start of a new window.	102 // Fill \|dest\| with frames from \|audio_buffer_\| starting from frame

119 void ResetWindow();	103 // \|read_offset_frames\|. \|dest\| is expected to have the same number of

	104 // channels as \|audio_buffer_\|. A negative offset, i.e.

	105 // \|read_offset_frames\| < 0, is accepted assuming that \|audio_buffer\| is zero

	106 // for negative indices. This might happen for few first frames. This method

	107 // assumes there is enough frames to fill \|dest\|, i.e. \|read_offset_frames\| +

	108 // \|dest->frames()\| does not extend to future.

	109 void PeekAudioWithZeroPrepend(int read_offset_frames, AudioBus* dest);

120	110

121 // Does a linear crossfade from \|intro\| into \|outtro\| for one frame.	111 // Run one iteration of WSOLA, if there are sufficient frames. This will

122 void CrossfadeFrame(AudioBus* intro,	112 // overlap-and-add one block to \|wsola_output_\|, hence, \|num_complete_frames_\|

123 int intro_offset,	113 // is incremented by \|ola_hop_size_\|.

124 AudioBus* outtro,	114 bool RunOneWsolaIteration();

125 int outtro_offset,	115

126 int fade_offset);	116 // Seek \|audio_buffer_\| forward to remove frames from input that are not used

	117 // any more. State of the WSOLA will be updated accordingly.

	118 void RemoveOldInputFrames();

	119

	120 // Update \|output_time_\| by \|time_change\|. In turn \|search_block_index_\| is

	121 // updated.

	122 void UpdateOutputTime(float time_change);

	123

	124 // Is \|target_block_\| fully within \|search_block_\|? If so, we don't need to

	125 // perform the search.

	126 bool TargetIsWithinSearchRegion() const;

	127

	128 // Do we have enough data to perform one round of WSOLA?

	129 bool CanPerformWsola() const;

127	130

128 // Number of channels in audio stream.	131 // Number of channels in audio stream.

129 int channels_;	132 int channels_;

130	133

131 // Sample rate of audio stream.	134 // Sample rate of audio stream.

132 int samples_per_second_;	135 int samples_per_second_;

133	136

134 // Used by algorithm to scale output.	137 // Used by algorithm to scale output.

135 float playback_rate_;	138 float playback_rate_;

136	139

137 // Buffered audio data.	140 // Buffered audio data.

138 AudioBufferQueue audio_buffer_;	141 AudioBufferQueue audio_buffer_;

139	142

140 // Length for crossfade in frames.

141 int frames_in_crossfade_;

142

143 // The current location in the audio window, between 0 and \|window_size_\|.

144 // When \|index_into_window_\| reaches \|window_size_\|, the window resets.

145 // Indexed by frame.

146 int index_into_window_;

147

148 // The frame number in the crossfade.

149 int crossfade_frame_number_;

150

151 // True if the audio should be muted.	143 // True if the audio should be muted.

152 bool muted_;	144 bool muted_;

153	145

154 // If muted, keep track of partial frames that should have been skipped over.	146 // If muted, keep track of partial frames that should have been skipped over.

155 double muted_partial_frame_;	147 double muted_partial_frame_;

156	148

157 // Temporary buffer to hold crossfade data.

158 scoped_ptr<AudioBus> crossfade_buffer_;

159

160 // Window size, in frames (calculated from audio properties).

161 int window_size_;

162

163 // How many frames to have in the queue before we report the queue is full.	149 // How many frames to have in the queue before we report the queue is full.

164 int capacity_;	150 int capacity_;

165	151

	152 // Book keeping of the current time of generated audio, in frames. This

	153 // should be appropriately updated when out samples are generated, regardless

	154 // of whether we push samples out when FillBuffer() is called or we store

	155 // audio in \|wsola_output_\| for the subsequent calls to FillBuffer().

	156 // Furthermore, if samples from \|audio_buffer_\| are evicted then this

	157 // member variable should be updated based on \|playback_rate_\|.

	158 // Note that this member should be updated ONLY by calling UpdateOutputTime(),

	159 // so that \|search_block_index_\| is update accordingly.

	160 float output_time_;

	161

	162 // The offset of the center frame of \|search_block_\| w.r.t. its first frame.

	163 int search_block_center_offset_;

	164

	165 // Index of the beginning of the \|search_block_\|, in frames.

	166 int search_block_index_;

	167

	168 // Number of Blocks to search to find the most similar one to the target

	169 // frame.

	170 int num_candidate_blocks_;

	171

	172 // Index of the beginning of the target block, counted in frames.

	173 int target_block_index_;

	174

	175 // Overlap-and-add window size in frames.

	176 int ola_window_size_;

	177

	178 // The hop size of overlap-and-add in frames. This implementation assumes 50%

	179 // overlap-and-add.

	180 int ola_hop_size_;

	181

	182 // Number of frames in \|wsola_output_\| that overlap-and-add is completed for

	183 // them and can be copied to output if FillBuffer() is called. It also

	184 // specifies the index where the next WSOLA window has to overlap-and-add.

	185 int num_complete_frames_;

	186

	187 // This stores a part of the output that is created but couldn't be rendered.

	188 // Output is generated frame-by-frame which at some point might exceed the

	189 // number of requested samples. Furthermore, due to overlap-and-add,

	190 // the last half-window of the output is incomplete, which is stored in this

	191 // buffer.

	192 scoped_ptr<AudioBus> wsola_output_;

	193

	194 // Overlap-and-add window.

	195 scoped_ptr<float[]> ola_window_;

	196

	197 // Transition window, used to update \|optimal_block_\| by a weighted sum of

	198 // \|optimal_block_\| and \|target_block_\|.

	199 scoped_ptr<float[]> transition_window_;

	200

	201 // Auxiliary variables to avoid allocation in every iteration.

	202

	203 // Stores the optimal block in every iteration. This is the most

	204 // similar block to \|target_block_\| within \|search_block_\| and it is

	205 // overlap-and-added to \|wsola_output_\|.

	206 scoped_ptr<AudioBus> optimal_block_;

	207

	208 // A block of data that search is performed over to find the \|optimal_block_\|.

	209 scoped_ptr<AudioBus> search_block_;

	210

	211 // Stores the target block, denoted as \|target\| above. \|search_block_\| is

	212 // searched for a block (\|optimal_block_\|) that is most similar to

	213 // \|target_block_\|.

	214 scoped_ptr<AudioBus> target_block_;

	215

166 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);	216 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);

167 };	217 };

168	218

169 } // namespace media	219 } // namespace media

170	220

171 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_	221 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_

OLD	NEW

« no previous file with comments | « no previous file | media/filters/audio_renderer_algorithm.cc » ('j') | media/filters/wsola_internals.h » ('J')