media/audio/win/audio_low_latency_input_win.cc - Issue 8283032: Low-latency AudioInputStream implementation based on WASAPI for Windows.

Side by Side Diff: media/audio/win/audio_low_latency_input_win.cc

Issue 8283032: Low-latency AudioInputStream implementation based on WASAPI for Windows. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Added ViewHostMsg_GetHardwareInputSampleRate IPC message Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« content/renderer/media/webrtc_audio_device_impl.cc ('K') | « media/audio/win/audio_low_latency_input_win.h ('k') | media/audio/win/audio_low_latency_input_win_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/audio/win/audio_low_latency_input_win.h"

	6

	7 #include <comdef.h>

	8

	9 #include "base/logging.h"

	10 #include "base/memory/scoped_ptr.h"

	11 #include "base/utf_string_conversions.h"

	12 #include "media/audio/audio_util.h"

	13 #include "media/audio/win/audio_manager_win.h"

	14

	15 using base::win::ScopedComPtr;

	16

	17 #ifndef NDEBUG

	18 static void DLogFormat(const char* str, const WAVEFORMATEX* format) {

	19 DLOG(INFO) << str << std::endl

	20 << " wFormatTag : " << format->wFormatTag << std::endl

	21 << " nChannels : " << format->nChannels << std::endl

	22 << " nSamplesPerSec : " << format->nSamplesPerSec << std::endl

	23 << " nAvgBytesPerSec: " << format->nAvgBytesPerSec << std::endl

	24 << " wBitsPerSample : " << format->wBitsPerSample << std::endl

	25 << " nBlockAlign : " << format->nBlockAlign << std::endl

	26 << " cbSize : " << format->cbSize << std::endl;

	27 }

	28 #endif

	29

	30 WASAPIAudioInputStream::WASAPIAudioInputStream(

	31 AudioManagerWin* manager, const AudioParameters& params, ERole device_role)

	32 : manager_(manager),

	33 capture_thread_(NULL),

	34 opened_(false),

	35 started_(false),

	36 endpoint_buffer_size_frames_(0),

	37 device_role_(device_role),

	38 sink_(NULL) {

	39 DCHECK(manager_);

	40

	41 // Set up the desired capture format specified by the client.

	42 format_.nSamplesPerSec = params.sample_rate;

	43 format_.wFormatTag = WAVE_FORMAT_PCM;

	44 format_.wBitsPerSample = params.bits_per_sample;

	45 format_.nChannels = params.channels;

	46 format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;

	47 format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;

	48 format_.cbSize = 0;

	49 #ifndef NDEBUG

	50 DLogFormat("Desired capture format:", &format_);

	51 #endif

	52

	53 // Size in bytes of each audio frame.

	54 frame_size_ = format_.nBlockAlign;

	55 // Store size of audio packets which we expect to get from the audio

	56 // endpoint device in each capture event.

	57 packet_size_frames_ = params.GetPacketSize() / format_.nBlockAlign;

	58 packet_size_bytes_ = params.GetPacketSize();

	59 DLOG(INFO) << "Number of bytes per audio frame : " << frame_size_;

	60 DLOG(INFO) << "Number of audio frames per packet: " << packet_size_frames_;

	61

	62 // All events are auto-reset events and non-signaled initially.

	63

	64 // Create the event which the audio engine will signal each time

	65 // a buffer becomes ready to be processed by the client.

	66 audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));

	67 DCHECK(audio_samples_ready_event_.IsValid());

	68

	69 // Create the event which will be set in Stop() when capturing shall stop.

	70 stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));

	71 DCHECK(stop_capture_event_.IsValid());

	72

	73 ms_to_frame_count_ = static_cast<double>(params.sample_rate) / 1000.0;

	74

	75 LARGE_INTEGER performance_frequency;

	76 if (QueryPerformanceFrequency(&performance_frequency)) {

	77 perf_count_to_100ns_units_ =

	78 (10000000.0 / static_cast<double>(performance_frequency.QuadPart));

	79 } else {

	80 LOG(ERROR) << "High-resolution performance counters are not supported.";

	81 perf_count_to_100ns_units_ = 0.0;

	82 }

	83 }

	84

	85 WASAPIAudioInputStream::~WASAPIAudioInputStream() {

	86 }

	87

	88 bool WASAPIAudioInputStream::Open() {

	89 // Verify that we are not already opened.

	90 if (opened_)

	91 return false;

	92

	93 // Obtain a reference to the IMMDevice interface of the default capturing

	94 // device with the specified role.

	95 HRESULT hr = SetCaptureDevice(device_role_);

	96 if (FAILED(hr)) {

	97 HandleError(hr);

	98 return false;

	99 }

	100

	101 // Obtain an IAudioClient interface which enables us to create and initialize

	102 // an audio stream between an audio application and the audio engine.

	103 hr = ActivateCaptureDevice();

	104 if (FAILED(hr)) {

	105 HandleError(hr);

	106 return false;

	107 }

	108

	109 // Retrieve the stream format which the audio engine uses for its internal

	110 // processing/mixing of shared-mode streams.

	111 hr = GetAudioEngineStreamFormat();

	112 if (FAILED(hr)) {

	113 HandleError(hr);

	114 return false;

	115 }

	116

	117 // Verify that the selected audio endpoint supports the specified format

	118 // set during construction.

	119 if (!DesiredFormatIsSupported()) {

	120 hr = E_INVALIDARG;

	121 HandleError(hr);

	122 return false;

	123 }

	124

	125 // Initialize the audio stream between the client and the device using

	126 // shared mode and a lowest possible glitch-free latency.

	127 hr = InitializeAudioEngine();

	128 if (FAILED(hr)) {

	129 HandleError(hr);

	130 return false;

	131 }

	132

	133 opened_ = true;

	134

	135 return true;

	136 }

	137

	138 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {

	139 DCHECK(callback);

	140 DCHECK(opened_);

	141

	142 if (!opened_)

	143 return;

	144

	145 if (started_)

	146 return;

	147

	148 sink_ = callback;

	149

	150 // Create and start the thread that will drive the capturing by waiting for

	151 // capture events.

	152 capture_thread_ =

	153 new base::DelegateSimpleThread(this, "wasapi_capture_thread");

	154 capture_thread_->Start();

	155

	156 // Start streaming data between the endpoint buffer and the audio engine.

	157 HRESULT hr = audio_client_->Start();

	158 DLOG_IF(ERROR, FAILED(hr)) << "Failed to start input streaming.";

	159

	160 started_ = SUCCEEDED(hr);

	161 }

	162

	163 void WASAPIAudioInputStream::Stop() {

	164 if (!started_)

	165 return;

	166

	167 // Shut down the capture thread.

	168 if (stop_capture_event_.IsValid()) {

	169 SetEvent(stop_capture_event_.Get());

	170 }

	171

	172 // Stop the input audio streaming.

	173 HRESULT hr = audio_client_->Stop();

	174 if (FAILED(hr)) {

	175 LOG(ERROR) << "Failed to stop input streaming.";

	176 }

	177

	178 // Wait until the thread completes and perform cleanup.

	179 if (capture_thread_) {

	180 SetEvent(stop_capture_event_.Get());

	181 capture_thread_->Join();

	182 capture_thread_ = NULL;

	183 }

	184

	185 started_ = false;

	186 }

	187

	188 void WASAPIAudioInputStream::Close() {

	189 // It is valid to call Close() before calling open or Start().

	190 // It is also valid to call Close() after Start() has been called.

	191 Stop();

	192 if (sink_) {

	193 sink_->OnClose(this);

	194 sink_ = NULL;

	195 }

	196

	197 // Inform the audio manager that we have been closed. This will cause our

	198 // destruction.

	199 manager_->ReleaseInputStream(this);

	200 }

	201

	202 double WASAPIAudioInputStream::HardwareSampleRate(ERole device_role) {

	203 // It is assumed that this static method is called from a COM thread, i.e.,

	204 // CoInitializeEx() is not called here to avoid STA/MTA conflicts.

	205 ScopedComPtr<IMMDeviceEnumerator> enumerator;

	206 HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),

	207 NULL,

	208 CLSCTX_INPROC_SERVER,

	209 __uuidof(IMMDeviceEnumerator),

	210 enumerator.ReceiveVoid());

	211 if (FAILED(hr)) {

	212 NOTREACHED() << "error code: " << hr;

	213 return 0.0;

	214 }

	215

	216 ScopedComPtr<IMMDevice> endpoint_device;

	217 hr = enumerator->GetDefaultAudioEndpoint(eCapture,

	218 device_role,

	219 endpoint_device.Receive());

	220 if (FAILED(hr)) {

	221 NOTREACHED() << "error code: " << hr;

	222 return 0.0;

	223 }

	224

	225 ScopedComPtr<IAudioClient> audio_client;

	226 hr = endpoint_device->Activate(__uuidof(IAudioClient),

	227 CLSCTX_INPROC_SERVER,

	228 NULL,

	229 audio_client.ReceiveVoid());

	230 if (FAILED(hr)) {

	231 NOTREACHED() << "error code: " << hr;

	232 return 0.0;

	233 }

	234

	235 ScopedComMem<WAVEFORMATEX> audio_engine_mix_format;

	236 hr = audio_client->GetMixFormat(audio_engine_mix_format.Receive());

	237 if (FAILED(hr)) {

	238 NOTREACHED() << "error code: " << hr;

	239 return 0.0;

	240 }

	241

	242 return static_cast<double>(audio_engine_mix_format->nSamplesPerSec);

	243 }

	244

	245 void WASAPIAudioInputStream::Run() {

	246 ScopedCOMInitializerMTA com_init;

	247

	248 capture_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);

	249

	250 // TODO(henrika): add MMCSS support.

	251

	252 // Allocate a buffer with a size that enables us to take care of cases like:

	253 // 1) The recorded buffer size is smaller, or does not match exactly with,

	254 // the selected packet size used in each callback.

	255 // 2) The selected buffer size is larger than the recorded buffer size in

	256 // each event.

	257 size_t buffer_frame_index = 0;

	258 size_t capture_buffer_size = std::max(

	259 2 * endpoint_buffer_size_frames_ * frame_size_,

	260 2 * packet_size_frames_ * frame_size_);

	261 scoped_array<uint8> capture_buffer(new uint8[capture_buffer_size]);

	262

	263 LARGE_INTEGER now_count;

	264 bool recording = true;

	265 HANDLE wait_array[2] = {stop_capture_event_, audio_samples_ready_event_};

	266

	267 while (recording) {

	268 HRESULT hr = S_FALSE;

	269

	270 // Wait for a close-down event or a new capture event.

	271 DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);

	272 switch (wait_result) {

	273 case WAIT_FAILED:

	274 recording = false;

	275 LOG(ERROR) << "WASAPI capturing failed with error code "

	276 << GetLastError();

	277 break;

	278 case WAIT_OBJECT_0 + 0:

	279 // \|stop_capture_event_\| has been set.

	280 recording = false;

	281 break;

	282 case WAIT_OBJECT_0 + 1:

	283 // \|audio_samples_ready_event_\| has been set.

	284 BYTE* data_ptr = NULL;

	285 UINT32 num_frames_to_read = 0;

	286 DWORD flags = 0;

	287 UINT64 device_position = 0;

	288 UINT64 first_audio_frame_timestamp = 0;

	289

	290 // Retrieve the amount of data in the capture endpoint buffer,

	291 // replace it with silence if required, create callbacks for each

	292 // packet and store non-delivered data for the next event.

	293 hr = audio_capture_client_->GetBuffer(&data_ptr,

	294 &num_frames_to_read,

	295 &flags,

	296 &device_position,

	297 &first_audio_frame_timestamp);

	298 if (SUCCEEDED(hr)) {

	299 if (num_frames_to_read != 0) {

	300 size_t pos = buffer_frame_index * frame_size_;

	301 size_t num_bytes = num_frames_to_read * frame_size_;

	302 DCHECK_GE(capture_buffer_size, pos + num_bytes);

	303

	304 if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {

	305 // Clear out the local buffer since silence is reported.

	306 memset(&capture_buffer[pos], 0, num_bytes);

	307 } else {

	308 // Copy captured data from audio engine buffer to local buffer.

	309 memcpy(&capture_buffer[pos], data_ptr, num_bytes);

	310 }

	311

	312 buffer_frame_index += num_frames_to_read;

	313 }

	314

	315 hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);

	316 if (FAILED(hr))

	317 HandleError(hr);

	318

	319 // Derive a delay estimate for the captured audio packet.

	320 // The value contains two parts (A+B), where A is the delay of the

	321 // first audio frame in the packet and B is the extra delay contained

	322 // in any stored data. Unit is in audio frames.

	323 QueryPerformanceCounter(&now_count);

	324 double audio_delay_frames =

	325 ((perf_count_to_100ns_units_ * now_count.QuadPart -

	326 first_audio_frame_timestamp) / 10000.0) * ms_to_frame_count_ +

	327 buffer_frame_index - num_frames_to_read;

	328

	329 // Deliver captured data to the registered consumer using a packet

	330 // size which was specified at construction.

	331 uint32 delay_frames = static_cast<uint32> (audio_delay_frames + 0.5);

	332 while (buffer_frame_index >= packet_size_frames_) {

	333 uint8* audio_data = reinterpret_cast<uint8*>(capture_buffer.get());

	334

	335 // Deliver data packet and delay estimation to the user.

	336 sink_->OnData(this,

	337 audio_data,

	338 packet_size_bytes_,

	339 delay_frames * frame_size_);

	340

	341 // Store parts of the recorded data which can't be delivered

	342 // using the current packet size. The stored section will be used

	343 // either in the next while-loop iteration or in the next

	344 // capture event.

	345 memmove(&capture_buffer[0],

	346 &capture_buffer[packet_size_bytes_],

	347 (buffer_frame_index - packet_size_frames_) * frame_size_);

	348

	349 buffer_frame_index -= packet_size_frames_;

	350 delay_frames -= packet_size_frames_;

	351 }

	352 }

	353 break;

	354 }

	355 }

	356 }

	357

	358 void WASAPIAudioInputStream::HandleError(HRESULT err) {

	359 _com_error com_error(err);

	360 std::wstring message(com_error.ErrorMessage());

	361 DLOG(ERROR) << "Error code: " << err;

	362 NOTREACHED() << "Error details: " << WideToUTF8(message);

	363

	364 if (sink_)

	365 sink_->OnError(this, static_cast<int>(err));

	366 }

	367

	368 HRESULT WASAPIAudioInputStream::SetCaptureDevice(ERole device_role) {

	369 ScopedComPtr<IMMDeviceEnumerator> enumerator;

	370 HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),

	371 NULL,

	372 CLSCTX_INPROC_SERVER,

	373 __uuidof(IMMDeviceEnumerator),

	374 enumerator.ReceiveVoid());

	375 if (SUCCEEDED(hr)) {

	376 // Retrieve the default capture audio endpoint for the specified role.

	377 // Note that, in Windows Vista, the MMDevice API supports device roles

	378 // but the system-supplied user interface programs do not.

	379 hr = enumerator->GetDefaultAudioEndpoint(eCapture,

	380 device_role,

	381 endpoint_device_.Receive());

	382

	383 // Verify that the audio endpoint device is active. That is, the audio

	384 // adapter that connects to the endpoint device is present and enabled.

	385 DWORD state = DEVICE_STATE_DISABLED;

	386 hr = endpoint_device_->GetState(&state);

	387 if (SUCCEEDED(hr)) {

	388 if (!(state & DEVICE_STATE_ACTIVE)) {

	389 DLOG(ERROR) << "Selected capture device is not active.";

	390 hr = E_ACCESSDENIED;

	391 }

	392 }

	393 }

	394

	395 return hr;

	396 }

	397

	398 HRESULT WASAPIAudioInputStream::ActivateCaptureDevice() {

	399 // Creates and activates an IAudioClient COM object given the selected

	400 // capture endpoint device.

	401 HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),

	402 CLSCTX_INPROC_SERVER,

	403 NULL,

	404 audio_client_.ReceiveVoid());

	405 return hr;

	406 }

	407

	408 HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {

	409 // Retrieve the stream format that the audio engine uses for its internal

	410 // processing/mixing of shared-mode streams.

	411 HRESULT hr = audio_client_->GetMixFormat(audio_engine_mix_format_.Receive());

	412 #ifndef NDEBUG

	413 if (SUCCEEDED(hr))

	414 DLogFormat("Audio Engine's format:", audio_engine_mix_format_.get());

	415 #endif

	416 return hr;

	417 }

	418

	419 bool WASAPIAudioInputStream::DesiredFormatIsSupported() {

	420 // In shared mode, the audio engine always supports the mix format,

	421 // which is stored in the \|audio_engine_mix_format_\| member. In addition,

	422 // the audio engine might support similar formats that have the same

	423 // sample rate and number of channels as the mix format but differ in

	424 // the representation of audio sample values.

	425 ScopedComMem<WAVEFORMATEX> closest_match;

	426 HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,

	427 &format_,

	428 closest_match.Receive());

	429 if (hr == S_FALSE) {

	430 DLOG(ERROR) << "Format is not supported but a closest match exists.";

	431 #ifndef NDEBUG

	432 DLogFormat("Closest suggested capture format:", closest_match.get());

	433 #endif

	434 }

	435 return (hr == S_OK);

	436 }

	437

	438 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {

	439 // Initialize the audio stream between the client and the device.

	440 // We connect indirectly through the audio engine by using shared mode

	441 // and WASAPI is initialized in an event driven mode.

	442 // Note that, \|hnsBufferDuration\| is set of 0, which ensures that the

	443 // buffer is never smaller than the minimum buffer size needed to ensure

	444 // that glitches do not occur between the periodic processing passes.

	445 // This setting should lead to lowest possible latency.

	446 HRESULT hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_SHARED,

	447 AUDCLNT_STREAMFLAGS_EVENTCALLBACK \|

	448 AUDCLNT_STREAMFLAGS_NOPERSIST,

	449 0, // hnsBufferDuration

	450 0,

	451 &format_,

	452 NULL);

	453 if (FAILED(hr))

	454 return hr;

	455

	456 // Retrieve the length of the endpoint buffer shared between the client

	457 // and the audio engine. The buffer length determines the maximum amount

	458 // of capture data that the audio engine can read from the endpoint buffer

	459 // during a single processing pass.

	460 // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.

	461 hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);

	462 if (FAILED(hr))

	463 return hr;

	464 DLOG(INFO) << "endpoint buffer size: " << endpoint_buffer_size_frames_

	465 << " [frames]";

	466

	467 #ifndef NDEBUG

	468 // The period between processing passes by the audio engine is fixed for a

	469 // particular audio endpoint device and represents the smallest processing

	470 // quantum for the audio engine. This period plus the stream latency between

	471 // the buffer and endpoint device represents the minimum possible latency

	472 // that an audio application can achieve.

	473 REFERENCE_TIME device_period_shared_mode = 0;

	474 REFERENCE_TIME device_period_exclusive_mode = 0;

	475 HRESULT hr_dbg = audio_client_->GetDevicePeriod(

	476 &device_period_shared_mode, &device_period_exclusive_mode);

	477 if (SUCCEEDED(hr_dbg)) {

	478 DLOG(INFO) << "device period: "

	479 << static_cast<double>(device_period_shared_mode / 10000.0)

	480 << " [ms]";

	481 }

	482

	483 REFERENCE_TIME latency = 0;

	484 hr_dbg = audio_client_->GetStreamLatency(&latency);

	485 if (SUCCEEDED(hr_dbg)) {

	486 DLOG(INFO) << "stream latency: " << static_cast<double>(latency / 10000.0)

	487 << " [ms]";

	488 }

	489 #endif

	490

	491 // Set the event handle that the audio engine will signal each time

	492 // a buffer becomes ready to be processed by the client.

	493 hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());

	494 if (FAILED(hr))

	495 return hr;

	496

	497 // Get access to the IAudioCaptureClient interface. This interface

	498 // enables us to read input data from the capture endpoint buffer.

	499 hr = audio_client_->GetService(__uuidof(IAudioCaptureClient),

	500 audio_capture_client_.ReceiveVoid());

	501 return hr;

	502 }

OLD	NEW