OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "media/audio/win/audio_unified_win.h" | |
6 | |
7 #include <Functiondiscoverykeys_devpkey.h> | |
8 | |
9 #include "base/debug/trace_event.h" | |
10 #ifndef NDEBUG | |
11 #include "base/file_util.h" | |
12 #include "base/path_service.h" | |
13 #endif | |
14 #include "base/time/time.h" | |
15 #include "base/win/scoped_com_initializer.h" | |
16 #include "media/audio/win/audio_manager_win.h" | |
17 #include "media/audio/win/avrt_wrapper_win.h" | |
18 #include "media/audio/win/core_audio_util_win.h" | |
19 | |
20 using base::win::ScopedComPtr; | |
21 using base::win::ScopedCOMInitializer; | |
22 using base::win::ScopedCoMem; | |
23 | |
24 // Smoothing factor in exponential smoothing filter where 0 < alpha < 1. | |
25 // Larger values of alpha reduce the level of smoothing. | |
26 // See http://en.wikipedia.org/wiki/Exponential_smoothing for details. | |
27 static const double kAlpha = 0.1; | |
28 | |
29 // Compute a rate compensation which always attracts us back to a specified | |
30 // target level over a period of |kCorrectionTimeSeconds|. | |
31 static const double kCorrectionTimeSeconds = 0.1; | |
32 | |
33 #ifndef NDEBUG | |
34 // Max number of columns in the output text file |kUnifiedAudioDebugFileName|. | |
35 // See LogElementNames enumerator for details on what each column represents. | |
36 static const size_t kMaxNumSampleTypes = 4; | |
37 | |
38 static const size_t kMaxNumParams = 2; | |
39 | |
40 // Max number of rows in the output file |kUnifiedAudioDebugFileName|. | |
41 // Each row corresponds to one set of sample values for (approximately) the | |
42 // same time instant (stored in the first column). | |
43 static const size_t kMaxFileSamples = 10000; | |
44 | |
45 // Name of output debug file used for off-line analysis of measurements which | |
46 // can be utilized for performance tuning of this class. | |
47 static const char kUnifiedAudioDebugFileName[] = "unified_win_debug.txt"; | |
48 | |
49 // Name of output debug file used for off-line analysis of measurements. | |
50 // This file will contain a list of audio parameters. | |
51 static const char kUnifiedAudioParamsFileName[] = "unified_win_params.txt"; | |
52 #endif | |
53 | |
54 // Use the acquired IAudioClock interface to derive a time stamp of the audio | |
55 // sample which is currently playing through the speakers. | |
56 static double SpeakerStreamPosInMilliseconds(IAudioClock* clock) { | |
57 UINT64 device_frequency = 0, position = 0; | |
58 if (FAILED(clock->GetFrequency(&device_frequency)) || | |
59 FAILED(clock->GetPosition(&position, NULL))) { | |
60 return 0.0; | |
61 } | |
62 return base::Time::kMillisecondsPerSecond * | |
63 (static_cast<double>(position) / device_frequency); | |
64 } | |
65 | |
66 // Get a time stamp in milliseconds given number of audio frames in |num_frames| | |
67 // using the current sample rate |fs| as scale factor. | |
68 // Example: |num_frames| = 960 and |fs| = 48000 => 20 [ms]. | |
69 static double CurrentStreamPosInMilliseconds(UINT64 num_frames, DWORD fs) { | |
70 return base::Time::kMillisecondsPerSecond * | |
71 (static_cast<double>(num_frames) / fs); | |
72 } | |
73 | |
74 // Convert a timestamp in milliseconds to byte units given the audio format | |
75 // in |format|. | |
76 // Example: |ts_milliseconds| equals 10, sample rate is 48000 and frame size | |
77 // is 4 bytes per audio frame => 480 * 4 = 1920 [bytes]. | |
78 static int MillisecondsToBytes(double ts_milliseconds, | |
79 const WAVEFORMATPCMEX& format) { | |
80 double seconds = ts_milliseconds / base::Time::kMillisecondsPerSecond; | |
81 return static_cast<int>(seconds * format.Format.nSamplesPerSec * | |
82 format.Format.nBlockAlign + 0.5); | |
83 } | |
84 | |
85 // Convert frame count to milliseconds given the audio format in |format|. | |
86 static double FrameCountToMilliseconds(int num_frames, | |
87 const WAVEFORMATPCMEX& format) { | |
88 return (base::Time::kMillisecondsPerSecond * num_frames) / | |
89 static_cast<double>(format.Format.nSamplesPerSec); | |
90 } | |
91 | |
92 namespace media { | |
93 | |
94 WASAPIUnifiedStream::WASAPIUnifiedStream(AudioManagerWin* manager, | |
95 const AudioParameters& params, | |
96 const std::string& input_device_id) | |
97 : creating_thread_id_(base::PlatformThread::CurrentId()), | |
98 manager_(manager), | |
99 params_(params), | |
100 input_channels_(params.input_channels()), | |
101 output_channels_(params.channels()), | |
102 input_device_id_(input_device_id), | |
103 share_mode_(CoreAudioUtil::GetShareMode()), | |
104 opened_(false), | |
105 volume_(1.0), | |
106 output_buffer_size_frames_(0), | |
107 input_buffer_size_frames_(0), | |
108 endpoint_render_buffer_size_frames_(0), | |
109 endpoint_capture_buffer_size_frames_(0), | |
110 num_written_frames_(0), | |
111 total_delay_ms_(0.0), | |
112 total_delay_bytes_(0), | |
113 source_(NULL), | |
114 input_callback_received_(false), | |
115 io_sample_rate_ratio_(1), | |
116 target_fifo_frames_(0), | |
117 average_delta_(0), | |
118 fifo_rate_compensation_(1), | |
119 update_output_delay_(false), | |
120 capture_delay_ms_(0) { | |
121 TRACE_EVENT0("audio", "WASAPIUnifiedStream::WASAPIUnifiedStream"); | |
122 VLOG(1) << "WASAPIUnifiedStream::WASAPIUnifiedStream()"; | |
123 DCHECK(manager_); | |
124 | |
125 VLOG(1) << "Input channels : " << input_channels_; | |
126 VLOG(1) << "Output channels: " << output_channels_; | |
127 VLOG(1) << "Sample rate : " << params_.sample_rate(); | |
128 VLOG(1) << "Buffer size : " << params.frames_per_buffer(); | |
129 | |
130 #ifndef NDEBUG | |
131 input_time_stamps_.reset(new int64[kMaxFileSamples]); | |
132 num_frames_in_fifo_.reset(new int[kMaxFileSamples]); | |
133 resampler_margin_.reset(new int[kMaxFileSamples]); | |
134 fifo_rate_comps_.reset(new double[kMaxFileSamples]); | |
135 num_elements_.reset(new int[kMaxNumSampleTypes]); | |
136 std::fill(num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes, 0); | |
137 input_params_.reset(new int[kMaxNumParams]); | |
138 output_params_.reset(new int[kMaxNumParams]); | |
139 #endif | |
140 | |
141 DVLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE) | |
142 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled."; | |
143 | |
144 // Load the Avrt DLL if not already loaded. Required to support MMCSS. | |
145 bool avrt_init = avrt::Initialize(); | |
146 DCHECK(avrt_init) << "Failed to load the avrt.dll"; | |
147 | |
148 // All events are auto-reset events and non-signaled initially. | |
149 | |
150 // Create the event which the audio engine will signal each time a buffer | |
151 // has been recorded. | |
152 capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); | |
153 | |
154 // Create the event which will be set in Stop() when straeming shall stop. | |
155 stop_streaming_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); | |
156 } | |
157 | |
158 WASAPIUnifiedStream::~WASAPIUnifiedStream() { | |
159 VLOG(1) << "WASAPIUnifiedStream::~WASAPIUnifiedStream()"; | |
160 #ifndef NDEBUG | |
161 base::FilePath data_file_name; | |
162 PathService::Get(base::DIR_EXE, &data_file_name); | |
163 data_file_name = data_file_name.AppendASCII(kUnifiedAudioDebugFileName); | |
164 data_file_ = base::OpenFile(data_file_name, "wt"); | |
165 DVLOG(1) << ">> Output file " << data_file_name.value() << " is created."; | |
166 | |
167 size_t n = 0; | |
168 size_t elements_to_write = *std::min_element( | |
169 num_elements_.get(), num_elements_.get() + kMaxNumSampleTypes); | |
170 while (n < elements_to_write) { | |
171 fprintf(data_file_, "%I64d %d %d %10.9f\n", | |
172 input_time_stamps_[n], | |
173 num_frames_in_fifo_[n], | |
174 resampler_margin_[n], | |
175 fifo_rate_comps_[n]); | |
176 ++n; | |
177 } | |
178 base::CloseFile(data_file_); | |
179 | |
180 base::FilePath param_file_name; | |
181 PathService::Get(base::DIR_EXE, ¶m_file_name); | |
182 param_file_name = param_file_name.AppendASCII(kUnifiedAudioParamsFileName); | |
183 param_file_ = base::OpenFile(param_file_name, "wt"); | |
184 DVLOG(1) << ">> Output file " << param_file_name.value() << " is created."; | |
185 fprintf(param_file_, "%d %d\n", input_params_[0], input_params_[1]); | |
186 fprintf(param_file_, "%d %d\n", output_params_[0], output_params_[1]); | |
187 base::CloseFile(param_file_); | |
188 #endif | |
189 } | |
190 | |
191 bool WASAPIUnifiedStream::Open() { | |
192 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Open"); | |
193 DVLOG(1) << "WASAPIUnifiedStream::Open()"; | |
194 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); | |
195 if (opened_) | |
196 return true; | |
197 | |
198 AudioParameters hw_output_params; | |
199 HRESULT hr = CoreAudioUtil::GetPreferredAudioParameters( | |
200 eRender, eConsole, &hw_output_params); | |
201 if (FAILED(hr)) { | |
202 LOG(ERROR) << "Failed to get preferred output audio parameters."; | |
203 return false; | |
204 } | |
205 | |
206 AudioParameters hw_input_params; | |
207 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) { | |
208 // Query native parameters for the default capture device. | |
209 hr = CoreAudioUtil::GetPreferredAudioParameters( | |
210 eCapture, eConsole, &hw_input_params); | |
211 } else { | |
212 // Query native parameters for the capture device given by | |
213 // |input_device_id_|. | |
214 hr = CoreAudioUtil::GetPreferredAudioParameters( | |
215 input_device_id_, &hw_input_params); | |
216 } | |
217 if (FAILED(hr)) { | |
218 LOG(ERROR) << "Failed to get preferred input audio parameters."; | |
219 return false; | |
220 } | |
221 | |
222 // It is currently only possible to open up the output audio device using | |
223 // the native number of channels. | |
224 if (output_channels_ != hw_output_params.channels()) { | |
225 LOG(ERROR) << "Audio device does not support requested output channels."; | |
226 return false; | |
227 } | |
228 | |
229 // It is currently only possible to open up the input audio device using | |
230 // the native number of channels. If the client asks for a higher channel | |
231 // count, we will do channel upmixing in this class. The most typical | |
232 // example is that the client provides stereo but the hardware can only be | |
233 // opened in mono mode. We will do mono to stereo conversion in this case. | |
234 if (input_channels_ < hw_input_params.channels()) { | |
235 LOG(ERROR) << "Audio device does not support requested input channels."; | |
236 return false; | |
237 } else if (input_channels_ > hw_input_params.channels()) { | |
238 ChannelLayout input_layout = | |
239 GuessChannelLayout(hw_input_params.channels()); | |
240 ChannelLayout output_layout = GuessChannelLayout(input_channels_); | |
241 channel_mixer_.reset(new ChannelMixer(input_layout, output_layout)); | |
242 DVLOG(1) << "Remixing input channel layout from " << input_layout | |
243 << " to " << output_layout << "; from " | |
244 << hw_input_params.channels() << " channels to " | |
245 << input_channels_; | |
246 } | |
247 | |
248 if (hw_output_params.sample_rate() != params_.sample_rate()) { | |
249 LOG(ERROR) << "Requested sample-rate: " << params_.sample_rate() | |
250 << " must match the hardware sample-rate: " | |
251 << hw_output_params.sample_rate(); | |
252 return false; | |
253 } | |
254 | |
255 if (hw_output_params.frames_per_buffer() != params_.frames_per_buffer()) { | |
256 LOG(ERROR) << "Requested buffer size: " << params_.frames_per_buffer() | |
257 << " must match the hardware buffer size: " | |
258 << hw_output_params.frames_per_buffer(); | |
259 return false; | |
260 } | |
261 | |
262 // Set up WAVEFORMATPCMEX structures for input and output given the specified | |
263 // audio parameters. | |
264 SetIOFormats(hw_input_params, params_); | |
265 | |
266 // Create the input and output busses. | |
267 input_bus_ = AudioBus::Create( | |
268 hw_input_params.channels(), input_buffer_size_frames_); | |
269 output_bus_ = AudioBus::Create(params_); | |
270 | |
271 // One extra bus is needed for the input channel mixing case. | |
272 if (channel_mixer_) { | |
273 DCHECK_LT(hw_input_params.channels(), input_channels_); | |
274 // The size of the |channel_bus_| must be the same as the size of the | |
275 // output bus to ensure that the channel manager can deal with both | |
276 // resampled and non-resampled data as input. | |
277 channel_bus_ = AudioBus::Create( | |
278 input_channels_, params_.frames_per_buffer()); | |
279 } | |
280 | |
281 // Check if FIFO and resampling is required to match the input rate to the | |
282 // output rate. If so, a special thread loop, optimized for this case, will | |
283 // be used. This mode is also called varispeed mode. | |
284 // Note that we can also use this mode when input and output rates are the | |
285 // same but native buffer sizes differ (can happen if two different audio | |
286 // devices are used). For this case, the resampler uses a target ratio of | |
287 // 1.0 but SetRatio is called to compensate for clock-drift. The FIFO is | |
288 // required to compensate for the difference in buffer sizes. | |
289 // TODO(henrika): we could perhaps improve the performance for the second | |
290 // case here by only using the FIFO and avoid resampling. Not sure how much | |
291 // that would give and we risk not compensation for clock drift. | |
292 if (hw_input_params.sample_rate() != params_.sample_rate() || | |
293 hw_input_params.frames_per_buffer() != params_.frames_per_buffer()) { | |
294 DoVarispeedInitialization(hw_input_params, params_); | |
295 } | |
296 | |
297 // Render side (event driven only in varispeed mode): | |
298 | |
299 ScopedComPtr<IAudioClient> audio_output_client = | |
300 CoreAudioUtil::CreateDefaultClient(eRender, eConsole); | |
301 if (!audio_output_client) | |
302 return false; | |
303 | |
304 if (!CoreAudioUtil::IsFormatSupported(audio_output_client, | |
305 share_mode_, | |
306 &output_format_)) { | |
307 return false; | |
308 } | |
309 | |
310 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { | |
311 // The |render_event_| will be NULL unless varispeed mode is utilized. | |
312 hr = CoreAudioUtil::SharedModeInitialize( | |
313 audio_output_client, &output_format_, render_event_.Get(), | |
314 &endpoint_render_buffer_size_frames_); | |
315 } else { | |
316 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE. | |
317 } | |
318 if (FAILED(hr)) | |
319 return false; | |
320 | |
321 ScopedComPtr<IAudioRenderClient> audio_render_client = | |
322 CoreAudioUtil::CreateRenderClient(audio_output_client); | |
323 if (!audio_render_client) | |
324 return false; | |
325 | |
326 // Capture side (always event driven but format depends on varispeed or not): | |
327 | |
328 ScopedComPtr<IAudioClient> audio_input_client; | |
329 if (input_device_id_ == AudioManagerBase::kDefaultDeviceId) { | |
330 audio_input_client = CoreAudioUtil::CreateDefaultClient(eCapture, eConsole); | |
331 } else { | |
332 ScopedComPtr<IMMDevice> audio_input_device( | |
333 CoreAudioUtil::CreateDevice(input_device_id_)); | |
334 audio_input_client = CoreAudioUtil::CreateClient(audio_input_device); | |
335 } | |
336 if (!audio_input_client) | |
337 return false; | |
338 | |
339 if (!CoreAudioUtil::IsFormatSupported(audio_input_client, | |
340 share_mode_, | |
341 &input_format_)) { | |
342 return false; | |
343 } | |
344 | |
345 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { | |
346 // Include valid event handle for event-driven initialization. | |
347 // The input side is always event driven independent of if varispeed is | |
348 // used or not. | |
349 hr = CoreAudioUtil::SharedModeInitialize( | |
350 audio_input_client, &input_format_, capture_event_.Get(), | |
351 &endpoint_capture_buffer_size_frames_); | |
352 } else { | |
353 // TODO(henrika): add support for AUDCLNT_SHAREMODE_EXCLUSIVE. | |
354 } | |
355 if (FAILED(hr)) | |
356 return false; | |
357 | |
358 ScopedComPtr<IAudioCaptureClient> audio_capture_client = | |
359 CoreAudioUtil::CreateCaptureClient(audio_input_client); | |
360 if (!audio_capture_client) | |
361 return false; | |
362 | |
363 // Varispeed mode requires additional preparations. | |
364 if (VarispeedMode()) | |
365 ResetVarispeed(); | |
366 | |
367 // Store all valid COM interfaces. | |
368 audio_output_client_ = audio_output_client; | |
369 audio_render_client_ = audio_render_client; | |
370 audio_input_client_ = audio_input_client; | |
371 audio_capture_client_ = audio_capture_client; | |
372 | |
373 opened_ = true; | |
374 return SUCCEEDED(hr); | |
375 } | |
376 | |
377 void WASAPIUnifiedStream::Start(AudioSourceCallback* callback) { | |
378 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Start"); | |
379 DVLOG(1) << "WASAPIUnifiedStream::Start()"; | |
380 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); | |
381 CHECK(callback); | |
382 CHECK(opened_); | |
383 | |
384 if (audio_io_thread_) { | |
385 CHECK_EQ(callback, source_); | |
386 return; | |
387 } | |
388 | |
389 source_ = callback; | |
390 | |
391 if (VarispeedMode()) { | |
392 ResetVarispeed(); | |
393 fifo_rate_compensation_ = 1.0; | |
394 average_delta_ = 0.0; | |
395 input_callback_received_ = false; | |
396 update_output_delay_ = false; | |
397 } | |
398 | |
399 // Create and start the thread that will listen for capture events. | |
400 // We will also listen on render events on the same thread if varispeed | |
401 // mode is utilized. | |
402 audio_io_thread_.reset( | |
403 new base::DelegateSimpleThread(this, "wasapi_io_thread")); | |
404 audio_io_thread_->Start(); | |
405 if (!audio_io_thread_->HasBeenStarted()) { | |
406 DLOG(ERROR) << "Failed to start WASAPI IO thread."; | |
407 return; | |
408 } | |
409 | |
410 // Start input streaming data between the endpoint buffer and the audio | |
411 // engine. | |
412 HRESULT hr = audio_input_client_->Start(); | |
413 if (FAILED(hr)) { | |
414 StopAndJoinThread(hr); | |
415 return; | |
416 } | |
417 | |
418 // Ensure that the endpoint buffer is prepared with silence. | |
419 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { | |
420 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( | |
421 audio_output_client_, audio_render_client_)) { | |
422 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence."; | |
423 return; | |
424 } | |
425 } | |
426 num_written_frames_ = endpoint_render_buffer_size_frames_; | |
427 | |
428 // Start output streaming data between the endpoint buffer and the audio | |
429 // engine. | |
430 hr = audio_output_client_->Start(); | |
431 if (FAILED(hr)) { | |
432 StopAndJoinThread(hr); | |
433 return; | |
434 } | |
435 } | |
436 | |
437 void WASAPIUnifiedStream::Stop() { | |
438 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Stop"); | |
439 DVLOG(1) << "WASAPIUnifiedStream::Stop()"; | |
440 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); | |
441 if (!audio_io_thread_) | |
442 return; | |
443 | |
444 // Stop input audio streaming. | |
445 HRESULT hr = audio_input_client_->Stop(); | |
446 if (FAILED(hr)) { | |
447 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) | |
448 << "Failed to stop input streaming: " << std::hex << hr; | |
449 } | |
450 | |
451 // Stop output audio streaming. | |
452 hr = audio_output_client_->Stop(); | |
453 if (FAILED(hr)) { | |
454 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) | |
455 << "Failed to stop output streaming: " << std::hex << hr; | |
456 } | |
457 | |
458 // Wait until the thread completes and perform cleanup. | |
459 SetEvent(stop_streaming_event_.Get()); | |
460 audio_io_thread_->Join(); | |
461 audio_io_thread_.reset(); | |
462 | |
463 // Ensure that we don't quit the main thread loop immediately next | |
464 // time Start() is called. | |
465 ResetEvent(stop_streaming_event_.Get()); | |
466 | |
467 // Clear source callback, it'll be set again on the next Start() call. | |
468 source_ = NULL; | |
469 | |
470 // Flush all pending data and reset the audio clock stream position to 0. | |
471 hr = audio_output_client_->Reset(); | |
472 if (FAILED(hr)) { | |
473 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) | |
474 << "Failed to reset output streaming: " << std::hex << hr; | |
475 } | |
476 | |
477 audio_input_client_->Reset(); | |
478 if (FAILED(hr)) { | |
479 DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED) | |
480 << "Failed to reset input streaming: " << std::hex << hr; | |
481 } | |
482 | |
483 // Extra safety check to ensure that the buffers are cleared. | |
484 // If the buffers are not cleared correctly, the next call to Start() | |
485 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer(). | |
486 // TODO(henrika): this check is is only needed for shared-mode streams. | |
487 UINT32 num_queued_frames = 0; | |
488 audio_output_client_->GetCurrentPadding(&num_queued_frames); | |
489 DCHECK_EQ(0u, num_queued_frames); | |
490 } | |
491 | |
492 void WASAPIUnifiedStream::Close() { | |
493 TRACE_EVENT0("audio", "WASAPIUnifiedStream::Close"); | |
494 DVLOG(1) << "WASAPIUnifiedStream::Close()"; | |
495 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_); | |
496 | |
497 // It is valid to call Close() before calling open or Start(). | |
498 // It is also valid to call Close() after Start() has been called. | |
499 Stop(); | |
500 | |
501 // Inform the audio manager that we have been closed. This will cause our | |
502 // destruction. | |
503 manager_->ReleaseOutputStream(this); | |
504 } | |
505 | |
506 void WASAPIUnifiedStream::SetVolume(double volume) { | |
507 DVLOG(1) << "SetVolume(volume=" << volume << ")"; | |
508 if (volume < 0 || volume > 1) | |
509 return; | |
510 volume_ = volume; | |
511 } | |
512 | |
513 void WASAPIUnifiedStream::GetVolume(double* volume) { | |
514 DVLOG(1) << "GetVolume()"; | |
515 *volume = static_cast<double>(volume_); | |
516 } | |
517 | |
518 | |
519 void WASAPIUnifiedStream::ProvideInput(int frame_delay, AudioBus* audio_bus) { | |
520 // TODO(henrika): utilize frame_delay? | |
521 // A non-zero framed delay means multiple callbacks were necessary to | |
522 // fulfill the requested number of frames. | |
523 if (frame_delay > 0) | |
524 DVLOG(3) << "frame_delay: " << frame_delay; | |
525 | |
526 #ifndef NDEBUG | |
527 resampler_margin_[num_elements_[RESAMPLER_MARGIN]] = | |
528 fifo_->frames() - audio_bus->frames(); | |
529 num_elements_[RESAMPLER_MARGIN]++; | |
530 #endif | |
531 | |
532 if (fifo_->frames() < audio_bus->frames()) { | |
533 DVLOG(ERROR) << "Not enough data in the FIFO (" | |
534 << fifo_->frames() << " < " << audio_bus->frames() << ")"; | |
535 audio_bus->Zero(); | |
536 return; | |
537 } | |
538 | |
539 fifo_->Consume(audio_bus, 0, audio_bus->frames()); | |
540 } | |
541 | |
542 void WASAPIUnifiedStream::SetIOFormats(const AudioParameters& input_params, | |
543 const AudioParameters& output_params) { | |
544 for (int n = 0; n < 2; ++n) { | |
545 const AudioParameters& params = (n == 0) ? input_params : output_params; | |
546 WAVEFORMATPCMEX* xformat = (n == 0) ? &input_format_ : &output_format_; | |
547 WAVEFORMATEX* format = &xformat->Format; | |
548 | |
549 // Begin with the WAVEFORMATEX structure that specifies the basic format. | |
550 format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; | |
551 format->nChannels = params.channels(); | |
552 format->nSamplesPerSec = params.sample_rate(); | |
553 format->wBitsPerSample = params.bits_per_sample(); | |
554 format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels; | |
555 format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign; | |
556 format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); | |
557 | |
558 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE. | |
559 // Note that we always open up using the native channel layout. | |
560 (*xformat).Samples.wValidBitsPerSample = format->wBitsPerSample; | |
561 (*xformat).dwChannelMask = | |
562 CoreAudioUtil::GetChannelConfig( | |
563 std::string(), n == 0 ? eCapture : eRender); | |
564 (*xformat).SubFormat = KSDATAFORMAT_SUBTYPE_PCM; | |
565 } | |
566 | |
567 input_buffer_size_frames_ = input_params.frames_per_buffer(); | |
568 output_buffer_size_frames_ = output_params.frames_per_buffer(); | |
569 VLOG(1) << "#audio frames per input buffer : " << input_buffer_size_frames_; | |
570 VLOG(1) << "#audio frames per output buffer: " << output_buffer_size_frames_; | |
571 | |
572 #ifndef NDEBUG | |
573 input_params_[0] = input_format_.Format.nSamplesPerSec; | |
574 input_params_[1] = input_buffer_size_frames_; | |
575 output_params_[0] = output_format_.Format.nSamplesPerSec; | |
576 output_params_[1] = output_buffer_size_frames_; | |
577 #endif | |
578 } | |
579 | |
580 void WASAPIUnifiedStream::DoVarispeedInitialization( | |
581 const AudioParameters& input_params, const AudioParameters& output_params) { | |
582 DVLOG(1) << "WASAPIUnifiedStream::DoVarispeedInitialization()"; | |
583 | |
584 // A FIFO is required in this mode for input to output buffering. | |
585 // Note that it will add some latency. | |
586 fifo_.reset(new AudioFifo(input_params.channels(), kFifoSize)); | |
587 VLOG(1) << "Using FIFO of size " << fifo_->max_frames() | |
588 << " (#channels=" << input_params.channels() << ")"; | |
589 | |
590 // Create the multi channel resampler using the initial sample rate ratio. | |
591 // We will call MultiChannelResampler::SetRatio() during runtime to | |
592 // allow arbitrary combinations of input and output devices running off | |
593 // different clocks and using different drivers, with potentially | |
594 // differing sample-rates. Note that the requested block size is given by | |
595 // the native input buffer size |input_buffer_size_frames_|. | |
596 io_sample_rate_ratio_ = input_params.sample_rate() / | |
597 static_cast<double>(output_params.sample_rate()); | |
598 DVLOG(2) << "io_sample_rate_ratio: " << io_sample_rate_ratio_; | |
599 resampler_.reset(new MultiChannelResampler( | |
600 input_params.channels(), io_sample_rate_ratio_, input_buffer_size_frames_, | |
601 base::Bind(&WASAPIUnifiedStream::ProvideInput, base::Unretained(this)))); | |
602 VLOG(1) << "Resampling from " << input_params.sample_rate() << " to " | |
603 << output_params.sample_rate(); | |
604 | |
605 // The optimal number of frames we'd like to keep in the FIFO at all times. | |
606 // The actual size will vary but the goal is to ensure that the average size | |
607 // is given by this value. | |
608 target_fifo_frames_ = kTargetFifoSafetyFactor * input_buffer_size_frames_; | |
609 VLOG(1) << "Target FIFO size: " << target_fifo_frames_; | |
610 | |
611 // Create the event which the audio engine will signal each time it | |
612 // wants an audio buffer to render. | |
613 render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); | |
614 | |
615 // Allocate memory for temporary audio bus used to store resampled input | |
616 // audio. | |
617 resampled_bus_ = AudioBus::Create( | |
618 input_params.channels(), output_buffer_size_frames_); | |
619 | |
620 // Buffer initial silence corresponding to target I/O buffering. | |
621 ResetVarispeed(); | |
622 } | |
623 | |
624 void WASAPIUnifiedStream::ResetVarispeed() { | |
625 DCHECK(VarispeedMode()); | |
626 | |
627 // Buffer initial silence corresponding to target I/O buffering. | |
628 fifo_->Clear(); | |
629 scoped_ptr<AudioBus> silence = | |
630 AudioBus::Create(input_format_.Format.nChannels, | |
631 target_fifo_frames_); | |
632 silence->Zero(); | |
633 fifo_->Push(silence.get()); | |
634 resampler_->Flush(); | |
635 } | |
636 | |
637 void WASAPIUnifiedStream::Run() { | |
638 ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA); | |
639 | |
640 // Increase the thread priority. | |
641 audio_io_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio); | |
642 | |
643 // Enable MMCSS to ensure that this thread receives prioritized access to | |
644 // CPU resources. | |
645 // TODO(henrika): investigate if it is possible to include these additional | |
646 // settings in SetThreadPriority() as well. | |
647 DWORD task_index = 0; | |
648 HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio", | |
649 &task_index); | |
650 bool mmcss_is_ok = | |
651 (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL)); | |
652 if (!mmcss_is_ok) { | |
653 // Failed to enable MMCSS on this thread. It is not fatal but can lead | |
654 // to reduced QoS at high load. | |
655 DWORD err = GetLastError(); | |
656 LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ")."; | |
657 } | |
658 | |
659 // The IAudioClock interface enables us to monitor a stream's data | |
660 // rate and the current position in the stream. Allocate it before we | |
661 // start spinning. | |
662 ScopedComPtr<IAudioClock> audio_output_clock; | |
663 HRESULT hr = audio_output_client_->GetService( | |
664 __uuidof(IAudioClock), audio_output_clock.ReceiveVoid()); | |
665 LOG_IF(WARNING, FAILED(hr)) << "Failed to create IAudioClock: " | |
666 << std::hex << hr; | |
667 | |
668 bool streaming = true; | |
669 bool error = false; | |
670 | |
671 HANDLE wait_array[3]; | |
672 size_t num_handles = 0; | |
673 wait_array[num_handles++] = stop_streaming_event_; | |
674 wait_array[num_handles++] = capture_event_; | |
675 if (render_event_) { | |
676 // One extra event handle is needed in varispeed mode. | |
677 wait_array[num_handles++] = render_event_; | |
678 } | |
679 | |
680 // Keep streaming audio until stop event is signaled. | |
681 // Capture events are always used but render events are only active in | |
682 // varispeed mode. | |
683 while (streaming && !error) { | |
684 // Wait for a close-down event, or a new capture event. | |
685 DWORD wait_result = WaitForMultipleObjects(num_handles, | |
686 wait_array, | |
687 FALSE, | |
688 INFINITE); | |
689 switch (wait_result) { | |
690 case WAIT_OBJECT_0 + 0: | |
691 // |stop_streaming_event_| has been set. | |
692 streaming = false; | |
693 break; | |
694 case WAIT_OBJECT_0 + 1: | |
695 // |capture_event_| has been set | |
696 if (VarispeedMode()) { | |
697 ProcessInputAudio(); | |
698 } else { | |
699 ProcessInputAudio(); | |
700 ProcessOutputAudio(audio_output_clock); | |
701 } | |
702 break; | |
703 case WAIT_OBJECT_0 + 2: | |
704 DCHECK(VarispeedMode()); | |
705 // |render_event_| has been set | |
706 ProcessOutputAudio(audio_output_clock); | |
707 break; | |
708 default: | |
709 error = true; | |
710 break; | |
711 } | |
712 } | |
713 | |
714 if (streaming && error) { | |
715 // Stop audio streaming since something has gone wrong in our main thread | |
716 // loop. Note that, we are still in a "started" state, hence a Stop() call | |
717 // is required to join the thread properly. | |
718 audio_input_client_->Stop(); | |
719 audio_output_client_->Stop(); | |
720 PLOG(ERROR) << "WASAPI streaming failed."; | |
721 } | |
722 | |
723 // Disable MMCSS. | |
724 if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) { | |
725 PLOG(WARNING) << "Failed to disable MMCSS"; | |
726 } | |
727 } | |
728 | |
729 void WASAPIUnifiedStream::ProcessInputAudio() { | |
730 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessInputAudio"); | |
731 | |
732 BYTE* data_ptr = NULL; | |
733 UINT32 num_captured_frames = 0; | |
734 DWORD flags = 0; | |
735 UINT64 device_position = 0; | |
736 UINT64 capture_time_stamp = 0; | |
737 | |
738 const int bytes_per_sample = input_format_.Format.wBitsPerSample >> 3; | |
739 | |
740 base::TimeTicks now_tick = base::TimeTicks::HighResNow(); | |
741 | |
742 #ifndef NDEBUG | |
743 if (VarispeedMode()) { | |
744 input_time_stamps_[num_elements_[INPUT_TIME_STAMP]] = | |
745 now_tick.ToInternalValue(); | |
746 num_elements_[INPUT_TIME_STAMP]++; | |
747 } | |
748 #endif | |
749 | |
750 // Retrieve the amount of data in the capture endpoint buffer. | |
751 // |endpoint_capture_time_stamp| is the value of the performance | |
752 // counter at the time that the audio endpoint device recorded | |
753 // the device position of the first audio frame in the data packet. | |
754 HRESULT hr = audio_capture_client_->GetBuffer(&data_ptr, | |
755 &num_captured_frames, | |
756 &flags, | |
757 &device_position, | |
758 &capture_time_stamp); | |
759 if (FAILED(hr)) { | |
760 DLOG(ERROR) << "Failed to get data from the capture buffer"; | |
761 return; | |
762 } | |
763 | |
764 if (hr == AUDCLNT_S_BUFFER_EMPTY) { | |
765 // The return coded is a success code but a new packet is *not* available | |
766 // and none of the output parameters in the GetBuffer() call contains valid | |
767 // values. Best we can do is to deliver silence and avoid setting | |
768 // |input_callback_received_| since this only seems to happen for the | |
769 // initial event(s) on some devices. | |
770 input_bus_->Zero(); | |
771 } else { | |
772 // Valid data has been recorded and it is now OK to set the flag which | |
773 // informs the render side that capturing has started. | |
774 input_callback_received_ = true; | |
775 } | |
776 | |
777 if (num_captured_frames != 0) { | |
778 if (flags & AUDCLNT_BUFFERFLAGS_SILENT) { | |
779 // Clear out the capture buffer since silence is reported. | |
780 input_bus_->Zero(); | |
781 } else { | |
782 // Store captured data in an audio bus after de-interleaving | |
783 // the data to match the audio bus structure. | |
784 input_bus_->FromInterleaved( | |
785 data_ptr, num_captured_frames, bytes_per_sample); | |
786 } | |
787 } | |
788 | |
789 hr = audio_capture_client_->ReleaseBuffer(num_captured_frames); | |
790 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer"; | |
791 | |
792 // Buffer input into FIFO if varispeed mode is used. The render event | |
793 // will drive resampling of this data to match the output side. | |
794 if (VarispeedMode()) { | |
795 int available_frames = fifo_->max_frames() - fifo_->frames(); | |
796 if (input_bus_->frames() <= available_frames) { | |
797 fifo_->Push(input_bus_.get()); | |
798 } | |
799 #ifndef NDEBUG | |
800 num_frames_in_fifo_[num_elements_[NUM_FRAMES_IN_FIFO]] = | |
801 fifo_->frames(); | |
802 num_elements_[NUM_FRAMES_IN_FIFO]++; | |
803 #endif | |
804 } | |
805 | |
806 // Save resource by not asking for new delay estimates each time. | |
807 // These estimates are fairly stable and it is perfectly safe to only | |
808 // sample at a rate of ~1Hz. | |
809 // TODO(henrika): we might have to increase the update rate in varispeed | |
810 // mode since the delay variations are higher in this mode. | |
811 if ((now_tick - last_delay_sample_time_).InMilliseconds() > | |
812 kTimeDiffInMillisecondsBetweenDelayMeasurements && | |
813 input_callback_received_) { | |
814 // Calculate the estimated capture delay, i.e., the latency between | |
815 // the recording time and the time we when we are notified about | |
816 // the recorded data. Note that the capture time stamp is given in | |
817 // 100-nanosecond (0.1 microseconds) units. | |
818 base::TimeDelta diff = | |
819 now_tick - base::TimeTicks::FromInternalValue(0.1 * capture_time_stamp); | |
820 capture_delay_ms_ = diff.InMillisecondsF(); | |
821 | |
822 last_delay_sample_time_ = now_tick; | |
823 update_output_delay_ = true; | |
824 } | |
825 } | |
826 | |
827 void WASAPIUnifiedStream::ProcessOutputAudio(IAudioClock* audio_output_clock) { | |
828 TRACE_EVENT0("audio", "WASAPIUnifiedStream::ProcessOutputAudio"); | |
829 | |
830 if (!input_callback_received_) { | |
831 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) { | |
832 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( | |
833 audio_output_client_, audio_render_client_)) | |
834 DLOG(WARNING) << "Failed to prepare endpoint buffers with silence."; | |
835 } | |
836 return; | |
837 } | |
838 | |
839 // Rate adjusted resampling is required in varispeed mode. It means that | |
840 // recorded audio samples will be read from the FIFO, resampled to match the | |
841 // output sample-rate and then stored in |resampled_bus_|. | |
842 if (VarispeedMode()) { | |
843 // Calculate a varispeed rate scalar factor to compensate for drift between | |
844 // input and output. We use the actual number of frames still in the FIFO | |
845 // compared with the ideal value of |target_fifo_frames_|. | |
846 int delta = fifo_->frames() - target_fifo_frames_; | |
847 | |
848 // Average |delta| because it can jitter back/forth quite frequently | |
849 // by +/- the hardware buffer-size *if* the input and output callbacks are | |
850 // happening at almost exactly the same time. Also, if the input and output | |
851 // sample-rates are different then |delta| will jitter quite a bit due to | |
852 // the rate conversion happening in the varispeed, plus the jittering of | |
853 // the callbacks. The average value is what's important here. | |
854 // We use an exponential smoothing filter to reduce the variations. | |
855 average_delta_ += kAlpha * (delta - average_delta_); | |
856 | |
857 // Compute a rate compensation which always attracts us back to the | |
858 // |target_fifo_frames_| over a period of kCorrectionTimeSeconds. | |
859 double correction_time_frames = | |
860 kCorrectionTimeSeconds * output_format_.Format.nSamplesPerSec; | |
861 fifo_rate_compensation_ = | |
862 (correction_time_frames + average_delta_) / correction_time_frames; | |
863 | |
864 #ifndef NDEBUG | |
865 fifo_rate_comps_[num_elements_[RATE_COMPENSATION]] = | |
866 fifo_rate_compensation_; | |
867 num_elements_[RATE_COMPENSATION]++; | |
868 #endif | |
869 | |
870 // Adjust for FIFO drift. | |
871 const double new_ratio = io_sample_rate_ratio_ * fifo_rate_compensation_; | |
872 resampler_->SetRatio(new_ratio); | |
873 // Get resampled input audio from FIFO where the size is given by the | |
874 // output side. | |
875 resampler_->Resample(resampled_bus_->frames(), resampled_bus_.get()); | |
876 } | |
877 | |
878 // Derive a new total delay estimate if the capture side has set the | |
879 // |update_output_delay_| flag. | |
880 if (update_output_delay_) { | |
881 // Calculate the estimated render delay, i.e., the time difference | |
882 // between the time when data is added to the endpoint buffer and | |
883 // when the data is played out on the actual speaker. | |
884 const double stream_pos = CurrentStreamPosInMilliseconds( | |
885 num_written_frames_ + output_buffer_size_frames_, | |
886 output_format_.Format.nSamplesPerSec); | |
887 const double speaker_pos = | |
888 SpeakerStreamPosInMilliseconds(audio_output_clock); | |
889 const double render_delay_ms = stream_pos - speaker_pos; | |
890 const double fifo_delay_ms = VarispeedMode() ? | |
891 FrameCountToMilliseconds(target_fifo_frames_, input_format_) : 0; | |
892 | |
893 // Derive the total delay, i.e., the sum of the input and output | |
894 // delays. Also convert the value into byte units. An extra FIFO delay | |
895 // is added for varispeed usage cases. | |
896 total_delay_ms_ = VarispeedMode() ? | |
897 capture_delay_ms_ + render_delay_ms + fifo_delay_ms : | |
898 capture_delay_ms_ + render_delay_ms; | |
899 DVLOG(2) << "total_delay_ms : " << total_delay_ms_; | |
900 DVLOG(3) << " capture_delay_ms: " << capture_delay_ms_; | |
901 DVLOG(3) << " render_delay_ms : " << render_delay_ms; | |
902 DVLOG(3) << " fifo_delay_ms : " << fifo_delay_ms; | |
903 total_delay_bytes_ = MillisecondsToBytes(total_delay_ms_, output_format_); | |
904 | |
905 // Wait for new signal from the capture side. | |
906 update_output_delay_ = false; | |
907 } | |
908 | |
909 // Select source depending on if varispeed is utilized or not. | |
910 // Also, the source might be the output of a channel mixer if channel mixing | |
911 // is required to match the native input channels to the number of input | |
912 // channels used by the client (given by |input_channels_| in this case). | |
913 AudioBus* input_bus = VarispeedMode() ? | |
914 resampled_bus_.get() : input_bus_.get(); | |
915 if (channel_mixer_) { | |
916 DCHECK_EQ(input_bus->frames(), channel_bus_->frames()); | |
917 // Most common case is 1->2 channel upmixing. | |
918 channel_mixer_->Transform(input_bus, channel_bus_.get()); | |
919 // Use the output from the channel mixer as new input bus. | |
920 input_bus = channel_bus_.get(); | |
921 } | |
922 | |
923 // Prepare for rendering by calling OnMoreIOData(). | |
924 int frames_filled = source_->OnMoreIOData( | |
925 input_bus, | |
926 output_bus_.get(), | |
927 AudioBuffersState(0, total_delay_bytes_)); | |
928 DCHECK_EQ(frames_filled, output_bus_->frames()); | |
929 | |
930 // Keep track of number of rendered frames since we need it for | |
931 // our delay calculations. | |
932 num_written_frames_ += frames_filled; | |
933 | |
934 // Derive the the amount of available space in the endpoint buffer. | |
935 // Avoid render attempt if there is no room for a captured packet. | |
936 UINT32 num_queued_frames = 0; | |
937 audio_output_client_->GetCurrentPadding(&num_queued_frames); | |
938 if (endpoint_render_buffer_size_frames_ - num_queued_frames < | |
939 output_buffer_size_frames_) | |
940 return; | |
941 | |
942 // Grab all available space in the rendering endpoint buffer | |
943 // into which the client can write a data packet. | |
944 uint8* audio_data = NULL; | |
945 HRESULT hr = audio_render_client_->GetBuffer(output_buffer_size_frames_, | |
946 &audio_data); | |
947 if (FAILED(hr)) { | |
948 DLOG(ERROR) << "Failed to access render buffer"; | |
949 return; | |
950 } | |
951 | |
952 const int bytes_per_sample = output_format_.Format.wBitsPerSample >> 3; | |
953 | |
954 // Convert the audio bus content to interleaved integer data using | |
955 // |audio_data| as destination. | |
956 output_bus_->Scale(volume_); | |
957 output_bus_->ToInterleaved( | |
958 output_buffer_size_frames_, bytes_per_sample, audio_data); | |
959 | |
960 // Release the buffer space acquired in the GetBuffer() call. | |
961 audio_render_client_->ReleaseBuffer(output_buffer_size_frames_, 0); | |
962 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release render buffer"; | |
963 | |
964 return; | |
965 } | |
966 | |
967 void WASAPIUnifiedStream::HandleError(HRESULT err) { | |
968 CHECK((started() && GetCurrentThreadId() == audio_io_thread_->tid()) || | |
969 (!started() && GetCurrentThreadId() == creating_thread_id_)); | |
970 NOTREACHED() << "Error code: " << std::hex << err; | |
971 if (source_) | |
972 source_->OnError(this); | |
973 } | |
974 | |
975 void WASAPIUnifiedStream::StopAndJoinThread(HRESULT err) { | |
976 CHECK(GetCurrentThreadId() == creating_thread_id_); | |
977 DCHECK(audio_io_thread_.get()); | |
978 SetEvent(stop_streaming_event_.Get()); | |
979 audio_io_thread_->Join(); | |
980 audio_io_thread_.reset(); | |
981 HandleError(err); | |
982 } | |
983 | |
984 } // namespace media | |
OLD | NEW |