OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ | |
6 #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ | |
7 | |
8 #include <Audioclient.h> | |
9 #include <MMDeviceAPI.h> | |
10 | |
11 #include <string> | |
12 | |
13 #include "base/compiler_specific.h" | |
14 #include "base/gtest_prod_util.h" | |
15 #include "base/threading/platform_thread.h" | |
16 #include "base/threading/simple_thread.h" | |
17 #include "base/win/scoped_co_mem.h" | |
18 #include "base/win/scoped_comptr.h" | |
19 #include "base/win/scoped_handle.h" | |
20 #include "media/audio/audio_io.h" | |
21 #include "media/audio/audio_parameters.h" | |
22 #include "media/base/audio_fifo.h" | |
23 #include "media/base/channel_mixer.h" | |
24 #include "media/base/media_export.h" | |
25 #include "media/base/multi_channel_resampler.h" | |
26 | |
27 namespace media { | |
28 | |
29 class AudioManagerWin; | |
30 | |
31 // Implementation of AudioOutputStream for Windows using the Core Audio API | |
32 // where both capturing and rendering takes place on the same thread to enable | |
33 // audio I/O. This class allows arbitrary combinations of input and output | |
34 // devices running off different clocks and using different drivers, with | |
35 // potentially differing sample-rates. | |
36 // | |
37 // It is required to first acquire the native sample rate of the selected | |
38 // output device and then use the same rate when creating this object. | |
39 // The inner operation depends on the input sample rate which is determined | |
40 // during construction. Three different main modes are supported: | |
41 // | |
42 // 1) input rate == output rate => input side drives output side directly. | |
43 // 2) input rate != output rate => both sides are driven independently by | |
44 // events and a FIFO plus a resampling unit is used to compensate for | |
45 // differences in sample rates between the two sides. | |
46 // 3) input rate == output rate but native buffer sizes are not identical => | |
47 // same inner functionality as in (2) to compensate for the differences | |
48 // in buffer sizes and also compensate for any potential clock drift | |
49 // between the two devices. | |
50 // | |
51 // Mode detection is is done at construction and using mode (1) will lead to | |
52 // best performance (lower delay and no "varispeed distortion"), i.e., it is | |
53 // recommended to use same sample rates for input and output. Mode (2) uses a | |
54 // resampler which supports rate adjustments to fine tune for things like | |
55 // clock drift and differences in sample rates between different devices. | |
56 // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler - | |
57 // is also called the varispeed mode and it is used for case (3) as well to | |
58 // compensate for the difference in buffer sizes mainly. | |
59 // Mode (3) can happen if two different audio devices are used. | |
60 // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others | |
61 // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates | |
62 // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.). | |
63 // | |
64 // Implementation notes: | |
65 // | |
66 // - Open() can fail if the input and output parameters do not fulfill | |
67 // certain conditions. See source for Open() for more details. | |
68 // - Channel mixing will be performed if the clients asks for a larger | |
69 // number of channels than the native audio layer provides. | |
70 // Example: client wants stereo but audio layer provides mono. In this case | |
71 // upmixing from mono to stereo (1->2) will be done. | |
72 // | |
73 // TODO(henrika): | |
74 // | |
75 // - Add support for exclusive mode. | |
76 // - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float | |
77 // as internal sample-value representation. | |
78 // - Perform fine-tuning for non-matching sample rates to reduce latency. | |
79 // | |
80 class MEDIA_EXPORT WASAPIUnifiedStream | |
81 : public AudioOutputStream, | |
82 public base::DelegateSimpleThread::Delegate { | |
83 public: | |
84 // The ctor takes all the usual parameters, plus |manager| which is the | |
85 // the audio manager who is creating this object. | |
86 WASAPIUnifiedStream(AudioManagerWin* manager, | |
87 const AudioParameters& params, | |
88 const std::string& input_device_id); | |
89 | |
90 // The dtor is typically called by the AudioManager only and it is usually | |
91 // triggered by calling AudioOutputStream::Close(). | |
92 virtual ~WASAPIUnifiedStream(); | |
93 | |
94 // Implementation of AudioOutputStream. | |
95 virtual bool Open() OVERRIDE; | |
96 virtual void Start(AudioSourceCallback* callback) OVERRIDE; | |
97 virtual void Stop() OVERRIDE; | |
98 virtual void Close() OVERRIDE; | |
99 virtual void SetVolume(double volume) OVERRIDE; | |
100 virtual void GetVolume(double* volume) OVERRIDE; | |
101 | |
102 bool started() const { | |
103 return audio_io_thread_.get() != NULL; | |
104 } | |
105 | |
106 // Returns true if input sample rate differs from the output sample rate. | |
107 // A FIFO and a adjustable multi-channel resampler are utilized in this mode. | |
108 bool VarispeedMode() const { return (fifo_ && resampler_); } | |
109 | |
110 private: | |
111 enum { | |
112 // Time in milliseconds between two successive delay measurements. | |
113 // We save resources by not updating the delay estimates for each capture | |
114 // event (typically 100Hz rate). | |
115 kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000, | |
116 | |
117 // Max possible FIFO size. | |
118 kFifoSize = 16384, | |
119 | |
120 // This value was determined empirically for minimum latency while still | |
121 // guarding against FIFO under-runs. The actual target size will be equal | |
122 // to kTargetFifoSafetyFactor * (native input buffer size). | |
123 // TODO(henrika): tune this value for lowest possible latency for all | |
124 // possible sample rate combinations. | |
125 kTargetFifoSafetyFactor = 2 | |
126 }; | |
127 | |
128 // Additional initialization required when input and output sample rate | |
129 // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|, | |
130 // and the |capture_bus_| and configures the |input_format_| structure | |
131 // given the provided input and output audio parameters. | |
132 void DoVarispeedInitialization(const AudioParameters& input_params, | |
133 const AudioParameters& output_params); | |
134 | |
135 // Clears varispeed related components such as the FIFO and the resampler. | |
136 void ResetVarispeed(); | |
137 | |
138 // Builds WAVEFORMATEX structures for input and output based on input and | |
139 // output audio parameters. | |
140 void SetIOFormats(const AudioParameters& input_params, | |
141 const AudioParameters& output_params); | |
142 | |
143 // DelegateSimpleThread::Delegate implementation. | |
144 virtual void Run() OVERRIDE; | |
145 | |
146 // MultiChannelResampler::MultiChannelAudioSourceProvider implementation. | |
147 // Callback for providing more data into the resampler. | |
148 // Only used in varispeed mode, i.e., when input rate != output rate. | |
149 virtual void ProvideInput(int frame_delay, AudioBus* audio_bus); | |
150 | |
151 // Issues the OnError() callback to the |source_|. | |
152 void HandleError(HRESULT err); | |
153 | |
154 // Stops and joins the audio thread in case of an error. | |
155 void StopAndJoinThread(HRESULT err); | |
156 | |
157 // Converts unique endpoint ID to user-friendly device name. | |
158 std::string GetDeviceName(LPCWSTR device_id) const; | |
159 | |
160 // Called on the audio IO thread for each capture event. | |
161 // Buffers captured audio into a FIFO if varispeed is used or into an audio | |
162 // bus if input and output sample rates are identical. | |
163 void ProcessInputAudio(); | |
164 | |
165 // Called on the audio IO thread for each render event when varispeed is | |
166 // active or for each capture event when varispeed is not used. | |
167 // In varispeed mode, it triggers a resampling callback, which reads from the | |
168 // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled | |
169 // input signal and at the same time asks for data to play out. | |
170 // If input and output rates are the same - instead of reading from the FIFO | |
171 // and do resampling - we read directly from the audio bus used to store | |
172 // captured data in ProcessInputAudio. | |
173 void ProcessOutputAudio(IAudioClock* audio_output_clock); | |
174 | |
175 // Contains the thread ID of the creating thread. | |
176 base::PlatformThreadId creating_thread_id_; | |
177 | |
178 // Our creator, the audio manager needs to be notified when we close. | |
179 AudioManagerWin* manager_; | |
180 | |
181 // Contains the audio parameter structure provided at construction. | |
182 AudioParameters params_; | |
183 // For convenience, same as in params_. | |
184 int input_channels_; | |
185 int output_channels_; | |
186 | |
187 // Unique ID of the input device to be opened. | |
188 const std::string input_device_id_; | |
189 | |
190 // The sharing mode for the streams. | |
191 // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE | |
192 // where AUDCLNT_SHAREMODE_SHARED is the default. | |
193 AUDCLNT_SHAREMODE share_mode_; | |
194 | |
195 // Rendering and capturing is driven by this thread (no message loop). | |
196 // All OnMoreIOData() callbacks will be called from this thread. | |
197 scoped_ptr<base::DelegateSimpleThread> audio_io_thread_; | |
198 | |
199 // Contains the desired audio output format which is set up at construction. | |
200 // It is required to first acquire the native sample rate of the selected | |
201 // output device and then use the same rate when creating this object. | |
202 WAVEFORMATPCMEX output_format_; | |
203 | |
204 // Contains the native audio input format which is set up at construction | |
205 // if varispeed mode is utilized. | |
206 WAVEFORMATPCMEX input_format_; | |
207 | |
208 // True when successfully opened. | |
209 bool opened_; | |
210 | |
211 // Volume level from 0 to 1 used for output scaling. | |
212 double volume_; | |
213 | |
214 // Size in audio frames of each audio packet where an audio packet | |
215 // is defined as the block of data which the destination is expected to | |
216 // receive in each OnMoreIOData() callback. | |
217 size_t output_buffer_size_frames_; | |
218 | |
219 // Size in audio frames of each audio packet where an audio packet | |
220 // is defined as the block of data which the source is expected to | |
221 // deliver in each OnMoreIOData() callback. | |
222 size_t input_buffer_size_frames_; | |
223 | |
224 // Length of the audio endpoint buffer. | |
225 uint32 endpoint_render_buffer_size_frames_; | |
226 uint32 endpoint_capture_buffer_size_frames_; | |
227 | |
228 // Counts the number of audio frames written to the endpoint buffer. | |
229 uint64 num_written_frames_; | |
230 | |
231 // Time stamp for last delay measurement. | |
232 base::TimeTicks last_delay_sample_time_; | |
233 | |
234 // Contains the total (sum of render and capture) delay in milliseconds. | |
235 double total_delay_ms_; | |
236 | |
237 // Contains the total (sum of render and capture and possibly FIFO) delay | |
238 // in bytes. The update frequency is set by a constant called | |
239 // |kTimeDiffInMillisecondsBetweenDelayMeasurements|. | |
240 int total_delay_bytes_; | |
241 | |
242 // Pointer to the client that will deliver audio samples to be played out. | |
243 AudioSourceCallback* source_; | |
244 | |
245 // IMMDevice interfaces which represents audio endpoint devices. | |
246 base::win::ScopedComPtr<IMMDevice> endpoint_render_device_; | |
247 base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_; | |
248 | |
249 // IAudioClient interfaces which enables a client to create and initialize | |
250 // an audio stream between an audio application and the audio engine. | |
251 base::win::ScopedComPtr<IAudioClient> audio_output_client_; | |
252 base::win::ScopedComPtr<IAudioClient> audio_input_client_; | |
253 | |
254 // IAudioRenderClient interfaces enables a client to write output | |
255 // data to a rendering endpoint buffer. | |
256 base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_; | |
257 | |
258 // IAudioCaptureClient interfaces enables a client to read input | |
259 // data from a capturing endpoint buffer. | |
260 base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_; | |
261 | |
262 // The audio engine will signal this event each time a buffer has been | |
263 // recorded. | |
264 base::win::ScopedHandle capture_event_; | |
265 | |
266 // The audio engine will signal this event each time it needs a new | |
267 // audio buffer to play out. | |
268 // Only utilized in varispeed mode. | |
269 base::win::ScopedHandle render_event_; | |
270 | |
271 // This event will be signaled when streaming shall stop. | |
272 base::win::ScopedHandle stop_streaming_event_; | |
273 | |
274 // Container for retrieving data from AudioSourceCallback::OnMoreIOData(). | |
275 scoped_ptr<AudioBus> output_bus_; | |
276 | |
277 // Container for sending data to AudioSourceCallback::OnMoreIOData(). | |
278 scoped_ptr<AudioBus> input_bus_; | |
279 | |
280 // Container for storing output from the channel mixer. | |
281 scoped_ptr<AudioBus> channel_bus_; | |
282 | |
283 // All members below are only allocated, or used, in varispeed mode: | |
284 | |
285 // Temporary storage of resampled input audio data. | |
286 scoped_ptr<AudioBus> resampled_bus_; | |
287 | |
288 // Set to true first time a capture event has been received in varispeed | |
289 // mode. | |
290 bool input_callback_received_; | |
291 | |
292 // MultiChannelResampler is a multi channel wrapper for SincResampler; | |
293 // allowing high quality sample rate conversion of multiple channels at once. | |
294 scoped_ptr<MultiChannelResampler> resampler_; | |
295 | |
296 // Resampler I/O ratio. | |
297 double io_sample_rate_ratio_; | |
298 | |
299 // Used for input to output buffering. | |
300 scoped_ptr<AudioFifo> fifo_; | |
301 | |
302 // The channel mixer is only created and utilized if number of input channels | |
303 // is larger than the native number of input channels (e.g client wants | |
304 // stereo but the audio device only supports mono). | |
305 scoped_ptr<ChannelMixer> channel_mixer_; | |
306 | |
307 // The optimal number of frames we'd like to keep in the FIFO at all times. | |
308 int target_fifo_frames_; | |
309 | |
310 // A running average of the measured delta between actual number of frames | |
311 // in the FIFO versus |target_fifo_frames_|. | |
312 double average_delta_; | |
313 | |
314 // A varispeed rate scalar which is calculated based on FIFO drift. | |
315 double fifo_rate_compensation_; | |
316 | |
317 // Set to true when input side signals output side that a new delay | |
318 // estimate is needed. | |
319 bool update_output_delay_; | |
320 | |
321 // Capture side stores its delay estimate so the sum can be derived in | |
322 // the render side. | |
323 double capture_delay_ms_; | |
324 | |
325 // TODO(henrika): possibly remove these members once the performance is | |
326 // properly tuned. Only used for off-line debugging. | |
327 #ifndef NDEBUG | |
328 enum LogElementNames { | |
329 INPUT_TIME_STAMP, | |
330 NUM_FRAMES_IN_FIFO, | |
331 RESAMPLER_MARGIN, | |
332 RATE_COMPENSATION | |
333 }; | |
334 | |
335 scoped_ptr<int64[]> input_time_stamps_; | |
336 scoped_ptr<int[]> num_frames_in_fifo_; | |
337 scoped_ptr<int[]> resampler_margin_; | |
338 scoped_ptr<double[]> fifo_rate_comps_; | |
339 scoped_ptr<int[]> num_elements_; | |
340 scoped_ptr<int[]> input_params_; | |
341 scoped_ptr<int[]> output_params_; | |
342 | |
343 FILE* data_file_; | |
344 FILE* param_file_; | |
345 #endif | |
346 | |
347 DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream); | |
348 }; | |
349 | |
350 } // namespace media | |
351 | |
352 #endif // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ | |
OLD | NEW |