media/audio/win/audio_low_latency_input_win.cc - Issue 8283032: Low-latency AudioInputStream implementation based on WASAPI for Windows.

Side by Side Diff: media/audio/win/audio_low_latency_input_win.cc

Issue 8283032: Low-latency AudioInputStream implementation based on WASAPI for Windows. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Now uses ScopedCoMem in base/win Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/audio/win/audio_low_latency_input_win.h"

	6

	7 #include "base/logging.h"

	8 #include "base/memory/scoped_ptr.h"

	9 #include "base/utf_string_conversions.h"

	10 #include "media/audio/audio_util.h"

	11 #include "media/audio/win/audio_manager_win.h"

	12 #include "media/audio/win/avrt_wrapper_win.h"

	13

	14 using base::win::ScopedComPtr;

	15 using base::win::ScopedCOMInitializer;

	16

	17 WASAPIAudioInputStream::WASAPIAudioInputStream(

	18 AudioManagerWin* manager, const AudioParameters& params, ERole device_role)

	19 : com_init_(ScopedCOMInitializer::kMTA),

	20 manager_(manager),

	21 capture_thread_(NULL),

	22 opened_(false),

	23 started_(false),

	24 endpoint_buffer_size_frames_(0),

	25 device_role_(device_role),

	26 sink_(NULL) {

	27 DCHECK(manager_);

	28

	29 // Load the Avrt DLL if not already loaded. Required to support MMCSS.

	30 bool avrt_init = avrt::Initialize();

	31 DCHECK(avrt_init) << "Failed to load the Avrt.dll";

	32

	33 // Set up the desired capture format specified by the client.

	34 format_.nSamplesPerSec = params.sample_rate;

	35 format_.wFormatTag = WAVE_FORMAT_PCM;

	36 format_.wBitsPerSample = params.bits_per_sample;

	37 format_.nChannels = params.channels;

	38 format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;

	39 format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;

	40 format_.cbSize = 0;

	41

	42 // Size in bytes of each audio frame.

	43 frame_size_ = format_.nBlockAlign;

	44 // Store size of audio packets which we expect to get from the audio

	45 // endpoint device in each capture event.

	46 packet_size_frames_ = params.GetPacketSize() / format_.nBlockAlign;

	47 packet_size_bytes_ = params.GetPacketSize();

	48 DVLOG(1) << "Number of bytes per audio frame : " << frame_size_;

	49 DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;

	50

	51 // All events are auto-reset events and non-signaled initially.

	52

	53 // Create the event which the audio engine will signal each time

	54 // a buffer becomes ready to be processed by the client.

	55 audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));

	56 DCHECK(audio_samples_ready_event_.IsValid());

	57

	58 // Create the event which will be set in Stop() when capturing shall stop.

	59 stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));

	60 DCHECK(stop_capture_event_.IsValid());

	61

	62 ms_to_frame_count_ = static_cast<double>(params.sample_rate) / 1000.0;

	63

	64 LARGE_INTEGER performance_frequency;

	65 if (QueryPerformanceFrequency(&performance_frequency)) {

	66 perf_count_to_100ns_units_ =

	67 (10000000.0 / static_cast<double>(performance_frequency.QuadPart));

	68 } else {

	69 LOG(ERROR) << "High-resolution performance counters are not supported.";

	70 perf_count_to_100ns_units_ = 0.0;

	71 }

	72 }

	73

	74 WASAPIAudioInputStream::~WASAPIAudioInputStream() {}

	75

	76 bool WASAPIAudioInputStream::Open() {

	77 // Verify that we are not already opened.

	78 if (opened_)

	79 return false;

	80

	81 // Obtain a reference to the IMMDevice interface of the default capturing

	82 // device with the specified role.

	83 HRESULT hr = SetCaptureDevice(device_role_);

	84 if (FAILED(hr)) {

	85 HandleError(hr);

	86 return false;

	87 }

	88

	89 // Obtain an IAudioClient interface which enables us to create and initialize

	90 // an audio stream between an audio application and the audio engine.

	91 hr = ActivateCaptureDevice();

	92 if (FAILED(hr)) {

	93 HandleError(hr);

	94 return false;

	95 }

	96

	97 // Retrieve the stream format which the audio engine uses for its internal

	98 // processing/mixing of shared-mode streams.

	99 hr = GetAudioEngineStreamFormat();

	100 if (FAILED(hr)) {

	101 HandleError(hr);

	102 return false;

	103 }

	104

	105 // Verify that the selected audio endpoint supports the specified format

	106 // set during construction.

	107 if (!DesiredFormatIsSupported()) {

	108 hr = E_INVALIDARG;

	109 HandleError(hr);

	110 return false;

	111 }

	112

	113 // Initialize the audio stream between the client and the device using

	114 // shared mode and a lowest possible glitch-free latency.

	115 hr = InitializeAudioEngine();

	116 if (FAILED(hr)) {

	117 HandleError(hr);

	118 return false;

	119 }

	120

	121 opened_ = true;

	122

	123 return true;

	124 }

	125

	126 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {

	127 DCHECK(callback);

	128 DCHECK(opened_);

	129

	130 if (!opened_)

	131 return;

	132

	133 if (started_)

	134 return;

	135

	136 sink_ = callback;

	137

	138 // Create and start the thread that will drive the capturing by waiting for

	139 // capture events.

	140 capture_thread_ =

	141 new base::DelegateSimpleThread(this, "wasapi_capture_thread");

	142 capture_thread_->Start();

	143

	144 // Start streaming data between the endpoint buffer and the audio engine.

	145 HRESULT hr = audio_client_->Start();

	146 DLOG_IF(ERROR, FAILED(hr)) << "Failed to start input streaming.";

	147

	148 started_ = SUCCEEDED(hr);

	149 }

	150

	151 void WASAPIAudioInputStream::Stop() {

	152 if (!started_)

	153 return;

	154

	155 // Shut down the capture thread.

	156 if (stop_capture_event_.IsValid()) {

	157 SetEvent(stop_capture_event_.Get());

	158 }

	159

	160 // Stop the input audio streaming.

	161 HRESULT hr = audio_client_->Stop();

	162 if (FAILED(hr)) {

	163 LOG(ERROR) << "Failed to stop input streaming.";

	164 }

	165

	166 // Wait until the thread completes and perform cleanup.

	167 if (capture_thread_) {

	168 SetEvent(stop_capture_event_.Get());

	169 capture_thread_->Join();

	170 capture_thread_ = NULL;

	171 }

	172

	173 started_ = false;

	174 }

	175

	176 void WASAPIAudioInputStream::Close() {

	177 // It is valid to call Close() before calling open or Start().

	178 // It is also valid to call Close() after Start() has been called.

	179 Stop();

	180 if (sink_) {

	181 sink_->OnClose(this);

	182 sink_ = NULL;

	183 }

	184

	185 // Inform the audio manager that we have been closed. This will cause our

	186 // destruction.

	187 manager_->ReleaseInputStream(this);

	188 }

	189

	190 // static

	191 double WASAPIAudioInputStream::HardwareSampleRate(ERole device_role) {

	192 // It is assumed that this static method is called from a COM thread, i.e.,

	193 // CoInitializeEx() is not called here to avoid STA/MTA conflicts.

	194 ScopedComPtr<IMMDeviceEnumerator> enumerator;

	195 HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),

	196 NULL,

	197 CLSCTX_INPROC_SERVER,

	198 __uuidof(IMMDeviceEnumerator),

	199 enumerator.ReceiveVoid());

	200 if (FAILED(hr)) {

	201 NOTREACHED() << "error code: " << hr;

	202 return 0.0;

	203 }

	204

	205 ScopedComPtr<IMMDevice> endpoint_device;

	206 hr = enumerator->GetDefaultAudioEndpoint(eCapture,

	207 device_role,

	208 endpoint_device.Receive());

	209 if (FAILED(hr)) {

	210 NOTREACHED() << "error code: " << hr;

	211 return 0.0;

	212 }

	213

	214 ScopedComPtr<IAudioClient> audio_client;

	215 hr = endpoint_device->Activate(__uuidof(IAudioClient),

	216 CLSCTX_INPROC_SERVER,

	217 NULL,

	218 audio_client.ReceiveVoid());

	219 if (FAILED(hr)) {

	220 NOTREACHED() << "error code: " << hr;

	221 return 0.0;

	222 }

	223

	224 base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;

	225 hr = audio_client->GetMixFormat(&audio_engine_mix_format);

	226 if (FAILED(hr)) {

	227 NOTREACHED() << "error code: " << hr;

	228 return 0.0;

	229 }

	230

	231 return static_cast<double>(audio_engine_mix_format->nSamplesPerSec);

	232 }

	233

	234 void WASAPIAudioInputStream::Run() {

	235 ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);

	236

	237 // Increase the thread priority.

	238 capture_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);

	239

	240 // Enable MMCSS to ensure that this thread receives prioritized access to

	241 // CPU resources.

	242 DWORD task_index = 0;

	243 HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",

	244 &task_index);

	245 bool mmcss_is_ok =

	246 (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));

	247 if (!mmcss_is_ok) {

	248 // Failed to enable MMCSS on this thread. It is not fatal but can lead

	249 // to reduced QoS at high load.

	250 DWORD err = GetLastError();

	251 LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";

	252 }

	253

	254 // Allocate a buffer with a size that enables us to take care of cases like:

	255 // 1) The recorded buffer size is smaller, or does not match exactly with,

	256 // the selected packet size used in each callback.

	257 // 2) The selected buffer size is larger than the recorded buffer size in

	258 // each event.

	259 size_t buffer_frame_index = 0;

	260 size_t capture_buffer_size = std::max(

	261 2 * endpoint_buffer_size_frames_ * frame_size_,

	262 2 * packet_size_frames_ * frame_size_);

	263 scoped_array<uint8> capture_buffer(new uint8[capture_buffer_size]);

	264

	265 LARGE_INTEGER now_count;

	266 bool recording = true;

	267 bool error = false;

	268 HANDLE wait_array[2] = {stop_capture_event_, audio_samples_ready_event_};

	269

	270 while (recording && !error) {

	271 HRESULT hr = S_FALSE;

	272

	273 // Wait for a close-down event or a new capture event.

	274 DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);

	275 switch (wait_result) {

	276 case WAIT_FAILED:

	277 error = true;

	278 break;

	279 case WAIT_OBJECT_0 + 0:

	280 // \|stop_capture_event_\| has been set.

	281 recording = false;

	282 break;

	283 case WAIT_OBJECT_0 + 1:

	284 {

	285 // \|audio_samples_ready_event_\| has been set.

	286 BYTE* data_ptr = NULL;

	287 UINT32 num_frames_to_read = 0;

	288 DWORD flags = 0;

	289 UINT64 device_position = 0;

	290 UINT64 first_audio_frame_timestamp = 0;

	291

	292 // Retrieve the amount of data in the capture endpoint buffer,

	293 // replace it with silence if required, create callbacks for each

	294 // packet and store non-delivered data for the next event.

	295 hr = audio_capture_client_->GetBuffer(&data_ptr,

	296 &num_frames_to_read,

	297 &flags,

	298 &device_position,

	299 &first_audio_frame_timestamp);

	300 if (FAILED(hr)) {

	301 DLOG(ERROR) << "Failed to get data from the capture buffer";

	302 continue;

	303 }

	304

	305 if (num_frames_to_read != 0) {

	306 size_t pos = buffer_frame_index * frame_size_;

	307 size_t num_bytes = num_frames_to_read * frame_size_;

	308 DCHECK_GE(capture_buffer_size, pos + num_bytes);

	309

	310 if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {

	311 // Clear out the local buffer since silence is reported.

	312 memset(&capture_buffer[pos], 0, num_bytes);

	313 } else {

	314 // Copy captured data from audio engine buffer to local buffer.

	315 memcpy(&capture_buffer[pos], data_ptr, num_bytes);

	316 }

	317

	318 buffer_frame_index += num_frames_to_read;

	319 }

	320

	321 hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);

	322 DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";

	323

	324 // Derive a delay estimate for the captured audio packet.

	325 // The value contains two parts (A+B), where A is the delay of the

	326 // first audio frame in the packet and B is the extra delay

	327 // contained in any stored data. Unit is in audio frames.

	328 QueryPerformanceCounter(&now_count);

	329 double audio_delay_frames =

	330 ((perf_count_to_100ns_units_ * now_count.QuadPart -

	331 first_audio_frame_timestamp) / 10000.0) * ms_to_frame_count_ +

	332 buffer_frame_index - num_frames_to_read;

	333

	334 // Deliver captured data to the registered consumer using a packet

	335 // size which was specified at construction.

	336 uint32 delay_frames = static_cast<uint32>(audio_delay_frames + 0.5);

	337 while (buffer_frame_index >= packet_size_frames_) {

	338 uint8* audio_data =

	339 reinterpret_cast<uint8*>(capture_buffer.get());

	340

	341 // Deliver data packet and delay estimation to the user.

	342 sink_->OnData(this,

	343 audio_data,

	344 packet_size_bytes_,

	345 delay_frames * frame_size_);

	346

	347 // Store parts of the recorded data which can't be delivered

	348 // using the current packet size. The stored section will be used

	349 // either in the next while-loop iteration or in the next

	350 // capture event.

	351 memmove(&capture_buffer[0],

	352 &capture_buffer[packet_size_bytes_],

	353 (buffer_frame_index - packet_size_frames_) * frame_size_);

	354

	355 buffer_frame_index -= packet_size_frames_;

	356 delay_frames -= packet_size_frames_;

	357 }

	358 }

	359 break;

	360 default:

	361 error = true;

	362 break;

	363 }

	364 }

	365

	366 if (recording && error) {

	367 // TODO(henrika): perhaps it worth improving the cleanup here by e.g.

	368 // stopping the audio client, joining the thread etc.?

	369 NOTREACHED() << "WASAPI capturing failed with error code "

	370 << GetLastError();

	371 }

	372

	373 // Disable MMCSS.

	374 if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {

	375 PLOG(WARNING) << "Failed to disable MMCSS";

	376 }

	377 }

	378

	379 void WASAPIAudioInputStream::HandleError(HRESULT err) {

	380 NOTREACHED() << "Error code: " << err;

	381 if (sink_)

	382 sink_->OnError(this, static_cast<int>(err));

	383 }

	384

	385 HRESULT WASAPIAudioInputStream::SetCaptureDevice(ERole device_role) {

	386 ScopedComPtr<IMMDeviceEnumerator> enumerator;

	387 HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),

	388 NULL,

	389 CLSCTX_INPROC_SERVER,

	390 __uuidof(IMMDeviceEnumerator),

	391 enumerator.ReceiveVoid());

	392 if (SUCCEEDED(hr)) {

	393 // Retrieve the default capture audio endpoint for the specified role.

	394 // Note that, in Windows Vista, the MMDevice API supports device roles

	395 // but the system-supplied user interface programs do not.

	396 hr = enumerator->GetDefaultAudioEndpoint(eCapture,

	397 device_role,

	398 endpoint_device_.Receive());

	399

	400 // Verify that the audio endpoint device is active. That is, the audio

	401 // adapter that connects to the endpoint device is present and enabled.

	402 DWORD state = DEVICE_STATE_DISABLED;

	403 hr = endpoint_device_->GetState(&state);

	404 if (SUCCEEDED(hr)) {

	405 if (!(state & DEVICE_STATE_ACTIVE)) {

	406 DLOG(ERROR) << "Selected capture device is not active.";

	407 hr = E_ACCESSDENIED;

	408 }

	409 }

	410 }

	411

	412 return hr;

	413 }

	414

	415 HRESULT WASAPIAudioInputStream::ActivateCaptureDevice() {

	416 // Creates and activates an IAudioClient COM object given the selected

	417 // capture endpoint device.

	418 HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),

	419 CLSCTX_INPROC_SERVER,

	420 NULL,

	421 audio_client_.ReceiveVoid());

	422 return hr;

	423 }

	424

	425 HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {

	426 // Retrieve the stream format that the audio engine uses for its internal

	427 // processing/mixing of shared-mode streams.

	428 return audio_client_->GetMixFormat(&audio_engine_mix_format_);

	429 }

	430

	431 bool WASAPIAudioInputStream::DesiredFormatIsSupported() {

	432 // In shared mode, the audio engine always supports the mix format,

	433 // which is stored in the \|audio_engine_mix_format_\| member. In addition,

	434 // the audio engine might support similar formats that have the same

	435 // sample rate and number of channels as the mix format but differ in

	436 // the representation of audio sample values.

	437 base::win::ScopedCoMem<WAVEFORMATEX> closest_match;

	438 HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,

	439 &format_,

	440 &closest_match);

	441 DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "

	442 << "but a closest match exists.";

	443 return (hr == S_OK);

	444 }

	445

	446 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {

	447 // Initialize the audio stream between the client and the device.

	448 // We connect indirectly through the audio engine by using shared mode

	449 // and WASAPI is initialized in an event driven mode.

	450 // Note that, \|hnsBufferDuration\| is set of 0, which ensures that the

	451 // buffer is never smaller than the minimum buffer size needed to ensure

	452 // that glitches do not occur between the periodic processing passes.

	453 // This setting should lead to lowest possible latency.

	454 HRESULT hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_SHARED,

	455 AUDCLNT_STREAMFLAGS_EVENTCALLBACK \|

	456 AUDCLNT_STREAMFLAGS_NOPERSIST,

	457 0, // hnsBufferDuration

	458 0,

	459 &format_,

	460 NULL);

	461 if (FAILED(hr))

	462 return hr;

	463

	464 // Retrieve the length of the endpoint buffer shared between the client

	465 // and the audio engine. The buffer length determines the maximum amount

	466 // of capture data that the audio engine can read from the endpoint buffer

	467 // during a single processing pass.

	468 // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.

	469 hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);

	470 if (FAILED(hr))

	471 return hr;

	472 DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_

	473 << " [frames]";

	474

	475 #ifndef NDEBUG

	476 // The period between processing passes by the audio engine is fixed for a

	477 // particular audio endpoint device and represents the smallest processing

	478 // quantum for the audio engine. This period plus the stream latency between

	479 // the buffer and endpoint device represents the minimum possible latency

	480 // that an audio application can achieve.

	481 // TODO(henrika): possibly remove this section when all parts are ready.

	482 REFERENCE_TIME device_period_shared_mode = 0;

	483 REFERENCE_TIME device_period_exclusive_mode = 0;

	484 HRESULT hr_dbg = audio_client_->GetDevicePeriod(

	485 &device_period_shared_mode, &device_period_exclusive_mode);

	486 if (SUCCEEDED(hr_dbg)) {

	487 DVLOG(1) << "device period: "

	488 << static_cast<double>(device_period_shared_mode / 10000.0)

	489 << " [ms]";

	490 }

	491

	492 REFERENCE_TIME latency = 0;

	493 hr_dbg = audio_client_->GetStreamLatency(&latency);

	494 if (SUCCEEDED(hr_dbg)) {

	495 DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0)

	496 << " [ms]";

	497 }

	498 #endif

	499

	500 // Set the event handle that the audio engine will signal each time

	501 // a buffer becomes ready to be processed by the client.

	502 hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());

	503 if (FAILED(hr))

	504 return hr;

	505

	506 // Get access to the IAudioCaptureClient interface. This interface

	507 // enables us to read input data from the capture endpoint buffer.

	508 hr = audio_client_->GetService(__uuidof(IAudioCaptureClient),

	509 audio_capture_client_.ReceiveVoid());

	510 return hr;

	511 }

OLD	NEW