media/audio/win/audio_low_latency_input_win.cc - Issue 2690793002: Add basic resample support to WASAPIAudioInputStream.

Side by Side Diff: media/audio/win/audio_low_latency_input_win.cc

Issue 2690793002: Add basic resample support to WASAPIAudioInputStream. (Closed)

Patch Set: Add FIFO in cases where we don't get an exact buffer match for resampling Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/audio/win/audio_low_latency_input_win.h"	5 #include "media/audio/win/audio_low_latency_input_win.h"

6	6

7 #include <memory>	7 #include <memory>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/metrics/histogram_macros.h"	10 #include "base/metrics/histogram_macros.h"

11 #include "base/strings/utf_string_conversions.h"	11 #include "base/strings/utf_string_conversions.h"

12 #include "base/trace_event/trace_event.h"	12 #include "base/trace_event/trace_event.h"

13 #include "media/audio/audio_device_description.h"	13 #include "media/audio/audio_device_description.h"

14 #include "media/audio/win/audio_manager_win.h"	14 #include "media/audio/win/audio_manager_win.h"

15 #include "media/audio/win/avrt_wrapper_win.h"	15 #include "media/audio/win/avrt_wrapper_win.h"

16 #include "media/audio/win/core_audio_util_win.h"	16 #include "media/audio/win/core_audio_util_win.h"

17 #include "media/base/audio_bus.h"	17 #include "media/base/audio_bus.h"

	18 #include "media/base/audio_fifo.h"

	19 #include "media/base/channel_layout.h"

	20 #include "media/base/limits.h"

	21 #include "media/base/multi_channel_resampler.h"

18	22

19 using base::win::ScopedComPtr;	23 using base::win::ScopedComPtr;

20 using base::win::ScopedCOMInitializer;	24 using base::win::ScopedCOMInitializer;

21	25

22 namespace media {	26 namespace media {

23	27

24 WASAPIAudioInputStream::WASAPIAudioInputStream(AudioManagerWin* manager,	28 WASAPIAudioInputStream::WASAPIAudioInputStream(AudioManagerWin* manager,

25 const AudioParameters& params,	29 const AudioParameters& params,

26 const std::string& device_id)	30 const std::string& device_id)

27 : manager_(manager),	31 : manager_(manager),

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
116 // set during construction.	120 // set during construction.

117 if (!DesiredFormatIsSupported()) {	121 if (!DesiredFormatIsSupported()) {

118 open_result_ = OPEN_RESULT_FORMAT_NOT_SUPPORTED;	122 open_result_ = OPEN_RESULT_FORMAT_NOT_SUPPORTED;

119 ReportOpenResult();	123 ReportOpenResult();

120 return false;	124 return false;

121 }	125 }

122	126

123 // Initialize the audio stream between the client and the device using	127 // Initialize the audio stream between the client and the device using

124 // shared mode and a lowest possible glitch-free latency.	128 // shared mode and a lowest possible glitch-free latency.

125 hr = InitializeAudioEngine();	129 hr = InitializeAudioEngine();

	130 if (SUCCEEDED(hr) && converter_)

	131 open_result_ = OPEN_RESULT_OK_WITH_RESAMPLING;

126 ReportOpenResult(); // Report before we assign a value to \|opened_\|.	132 ReportOpenResult(); // Report before we assign a value to \|opened_\|.

127 opened_ = SUCCEEDED(hr);	133 opened_ = SUCCEEDED(hr);

128 DCHECK(open_result_ == OPEN_RESULT_OK \|\| !opened_);

129	134

130 return opened_;	135 return opened_;

131 }	136 }

132	137

133 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {	138 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {

134 DCHECK(CalledOnValidThread());	139 DCHECK(CalledOnValidThread());

135 DCHECK(callback);	140 DCHECK(callback);

136 DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";	141 DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";

137 if (!opened_)	142 if (!opened_)

138 return;	143 return;

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
220 started_ = false;	225 started_ = false;

221 sink_ = NULL;	226 sink_ = NULL;

222 }	227 }

223	228

224 void WASAPIAudioInputStream::Close() {	229 void WASAPIAudioInputStream::Close() {

225 DVLOG(1) << "WASAPIAudioInputStream::Close()";	230 DVLOG(1) << "WASAPIAudioInputStream::Close()";

226 // It is valid to call Close() before calling open or Start().	231 // It is valid to call Close() before calling open or Start().

227 // It is also valid to call Close() after Start() has been called.	232 // It is also valid to call Close() after Start() has been called.

228 Stop();	233 Stop();

229	234

	235 if (converter_)

	236 converter_->RemoveInput(this);

	237

230 // Inform the audio manager that we have been closed. This will cause our	238 // Inform the audio manager that we have been closed. This will cause our

231 // destruction.	239 // destruction.

232 manager_->ReleaseInputStream(this);	240 manager_->ReleaseInputStream(this);

233 }	241 }

234	242

235 double WASAPIAudioInputStream::GetMaxVolume() {	243 double WASAPIAudioInputStream::GetMaxVolume() {

236 // Verify that Open() has been called succesfully, to ensure that an audio	244 // Verify that Open() has been called succesfully, to ensure that an audio

237 // session exists and that an ISimpleAudioVolume interface has been created.	245 // session exists and that an ISimpleAudioVolume interface has been created.

238 DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";	246 DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";

239 if (!opened_)	247 if (!opened_)

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
417 // each time SetVolume() is called through IPC by the render-side AGC.	425 // each time SetVolume() is called through IPC by the render-side AGC.

418 GetAgcVolume(&volume);	426 GetAgcVolume(&volume);

419	427

420 // Deliver captured data to the registered consumer using a packet	428 // Deliver captured data to the registered consumer using a packet

421 // size which was specified at construction.	429 // size which was specified at construction.

422 uint32_t delay_frames = static_cast<uint32_t>(audio_delay_frames + 0.5);	430 uint32_t delay_frames = static_cast<uint32_t>(audio_delay_frames + 0.5);

423 while (buffer_frame_index >= packet_size_frames_) {	431 while (buffer_frame_index >= packet_size_frames_) {

424 // Copy data to audio bus to match the OnData interface.	432 // Copy data to audio bus to match the OnData interface.

425 uint8_t* audio_data =	433 uint8_t* audio_data =

426 reinterpret_cast<uint8_t*>(capture_buffer.get());	434 reinterpret_cast<uint8_t*>(capture_buffer.get());

427 audio_bus_->FromInterleaved(audio_data, audio_bus_->frames(),

428 format_.wBitsPerSample / 8);

429	435

430 // Deliver data packet, delay estimation and volume level to	436 bool issue_callback = true;

431 // the user.	437 if (converter_) {

432 sink_->OnData(this, audio_bus_.get(), delay_frames * frame_size_,	438 convert_bus_->FromInterleaved(audio_data, packet_size_frames_,

433 volume);	439 format_.wBitsPerSample / 8);

	440 if (convert_fifo_) {

	441 convert_fifo_->Push(convert_bus_.get());

	442 // Since we have a fifo, we know that we have one in order to

	443 // avoid underruns. The size of the fifo will be large enough

	444 // to hold two buffers from the audio layer, but the minimum

	445 // number of frames required in order to safely be able to

	446 // convert data, will be one more frame than the buffer size

	447 // we have (one frame more will cover a larger time period than

	448 // the buffer size as requested by the client, and is only needed

	449 // when we reach the point where there would otherwise be an

	450 // underrun).

	451 issue_callback =

	452 (convert_fifo_->frames() >= (convert_bus_->frames() + 1));

	453 if (issue_callback) {

	454 data_was_converted_ = 0;

	455 converter_->ConvertWithDelay(delay_frames, audio_bus_.get());

	456 DCHECK(data_was_converted_ >= 0 \|\| data_was_converted_ < 2);

	457 }

	458 } else {

	459 data_was_converted_ = 0;

	460 converter_->ConvertWithDelay(delay_frames, audio_bus_.get());

	461 DCHECK_EQ(1, data_was_converted_);

	462 }

	463 } else {

	464 audio_bus_->FromInterleaved(audio_data, audio_bus_->frames(),

	465 format_.wBitsPerSample / 8);

	466 }

	467

	468 if (issue_callback) {

	469 // Deliver data packet, delay estimation and volume level to

	470 // the user.

	471 sink_->OnData(this, audio_bus_.get(), delay_frames * frame_size_,

	472 volume);

	473 }

434	474

435 // Store parts of the recorded data which can't be delivered	475 // Store parts of the recorded data which can't be delivered

436 // using the current packet size. The stored section will be used	476 // using the current packet size. The stored section will be used

437 // either in the next while-loop iteration or in the next	477 // either in the next while-loop iteration or in the next

438 // capture event.	478 // capture event.

439 // TODO(tommi): If this data will be used in the next capture	479 // TODO(tommi): If this data will be used in the next capture

440 // event, we will report incorrect delay estimates because	480 // event, we will report incorrect delay estimates because

441 // we'll use the one for the captured data that time around	481 // we'll use the one for the captured data that time around

442 // (i.e. in the future).	482 // (i.e. in the future).

443 memmove(&capture_buffer[0], &capture_buffer[packet_size_bytes_],	483 memmove(&capture_buffer[0], &capture_buffer[packet_size_bytes_],

(...skipping 142 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
586 // engine can convert between a standard PCM sample size used by the	626 // engine can convert between a standard PCM sample size used by the

587 // application and the floating-point samples that the engine uses for its	627 // application and the floating-point samples that the engine uses for its

588 // internal processing. However, the format for an application stream	628 // internal processing. However, the format for an application stream

589 // typically must have the same number of channels and the same sample	629 // typically must have the same number of channels and the same sample

590 // rate as the stream format used by the device.	630 // rate as the stream format used by the device.

591 // Many audio devices support both PCM and non-PCM stream formats. However,	631 // Many audio devices support both PCM and non-PCM stream formats. However,

592 // the audio engine can mix only PCM streams.	632 // the audio engine can mix only PCM streams.

593 base::win::ScopedCoMem<WAVEFORMATEX> closest_match;	633 base::win::ScopedCoMem<WAVEFORMATEX> closest_match;

594 HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,	634 HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,

595 &format_, &closest_match);	635 &format_, &closest_match);

596 DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "	636 DLOG_IF(ERROR, hr == S_FALSE)

597 << "but a closest match exists.";	637 << "Format is not supported but a closest match exists.";

	638

	639 if (hr == S_FALSE &&

	640 closest_match->nSamplesPerSec >= limits::kMinSampleRate &&

	641 closest_match->nSamplesPerSec <= limits::kMaxSampleRate) {

	642 DVLOG(1) << "Audio capture data conversion needed.";

	643 // Ideally, we want a 1:1 ratio between the buffers we get and the buffers

	644 // we give to OnData so that each buffer we receive from the OS can be

	645 // directly converted to a buffer that matches with what was asked for.

	646 const double buffer_ratio =

	647 format_.nSamplesPerSec / static_cast<double>(audio_bus_->frames());

	648 const size_t new_frames_per_buffer =

	649 static_cast<size_t>(closest_match->nSamplesPerSec / buffer_ratio);

	650

	651 const AudioParameters input(

	652 AudioParameters::AUDIO_PCM_LOW_LATENCY,

	653 GuessChannelLayout(closest_match->nChannels),

	654 closest_match->nSamplesPerSec,

	655 // We need to be careful here to not pick the closest wBitsPerSample

	656 // match as we need to use the PCM format (which might not be what

	657 // closeest_match->wFormat is) and the internal resampler doesn't

	658 // support all formats we might get here. So, we stick to the

	659 // wBitsPerSample that was asked for originally (most likely 16).

	660 format_.wBitsPerSample, new_frames_per_buffer);

	661

	662 const AudioParameters output(AudioParameters::AUDIO_PCM_LOW_LATENCY,

	663 GuessChannelLayout(format_.nChannels),

	664 format_.nSamplesPerSec, format_.wBitsPerSample,

	665 audio_bus_->frames());

	666

	667 converter_.reset(new AudioConverter(input, output, false));

	668 converter_->AddInput(this);

	669 converter_->PrimeWithSilence();

	670 convert_bus_ = AudioBus::Create(input);

	671

	672 // Now change the format we're going to ask for to better match with what

	673 // the OS can provide. If we succeed in opening the stream with these

	674 // params, we can take care of the required resampling.

	675 format_.nSamplesPerSec = closest_match->nSamplesPerSec;

	676 format_.nChannels = closest_match->nChannels;

	677 format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;

	678 format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;

	679

	680 // Update our packet size assumptions based on the new format.

	681 const auto new_bytes_per_buffer = convert_bus_->frames() *

	682 format_.nChannels *

	683 (format_.wBitsPerSample / 8);

	684 packet_size_frames_ = new_bytes_per_buffer / format_.nBlockAlign;

	685 packet_size_bytes_ = new_bytes_per_buffer;

	686 frame_size_ = format_.nBlockAlign;

	687 ms_to_frame_count_ = static_cast<double>(format_.nSamplesPerSec) / 1000.0;

	688

	689 // Check if we'll need to inject an intermediery buffer to avoid
	DaleCurtis 2017/02/16 19:59:48 You could also determine this by checking if \|new_ You could also determine this by checking if \|new_frames_per_buffer\| has a fractional part. tommi (sloooow) - chröme 2017/02/17 17:09:21 Of course! done. Show quoted text On 2017/02/16 19:59:48, DaleCurtis wrote: > You could also determine this by checking if \|new_frames_per_buffer\| has a > fractional part. Of course! done.
	690 // occasional underruns. This can happen if the buffers don't represent

	691 // an equal time period.

	692 const double buffer_ratio2 = closest_match->nSamplesPerSec /

	693 static_cast<double>(convert_bus_->frames());

	694 DCHECK(buffer_ratio <= buffer_ratio2);
	DaleCurtis 2017/02/16 19:59:48 DCHECK_LE(). DCHECK_LE(). tommi (sloooow) - chröme 2017/02/17 17:09:21 Now removed Show quoted text On 2017/02/16 19:59:48, DaleCurtis wrote: > DCHECK_LE(). Now removed
	695 if (buffer_ratio2 == buffer_ratio) {

	696 // The buffer ratio is equal, so nothing further needs to be done.

	697 // For every buffer we receive, we'll convert directly to a buffer that

	698 // will be delivered to the caller.

	699 } else {

	700 DVLOG(1) << "Audio capture data conversion: Need to inject fifo";

	701 convert_fifo_.reset(

	702 new AudioFifo(format_.nChannels, new_frames_per_buffer * 2));

	703 }

	704

	705 // Indicate that we're good to go with a close match.

	706 hr = S_OK;

	707 }

	708

598 return (hr == S_OK);	709 return (hr == S_OK);

599 }	710 }

600	711

601 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {	712 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {

602 DCHECK_EQ(OPEN_RESULT_OK, open_result_);	713 DCHECK_EQ(OPEN_RESULT_OK, open_result_);

603 DWORD flags;	714 DWORD flags;

604 // Use event-driven mode only fo regular input devices. For loopback the	715 // Use event-driven mode only fo regular input devices. For loopback the

605 // EVENTCALLBACK flag is specified when intializing	716 // EVENTCALLBACK flag is specified when intializing

606 // \|audio_render_client_for_loopback_\|.	717 // \|audio_render_client_for_loopback_\|.

607 if (device_id_ == AudioDeviceDescription::kLoopbackInputDeviceId \|\|	718 if (device_id_ == AudioDeviceDescription::kLoopbackInputDeviceId \|\|

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
731	842

732 return hr;	843 return hr;

733 }	844 }

734	845

735 void WASAPIAudioInputStream::ReportOpenResult() const {	846 void WASAPIAudioInputStream::ReportOpenResult() const {

736 DCHECK(!opened_); // This method must be called before we set this flag.	847 DCHECK(!opened_); // This method must be called before we set this flag.

737 UMA_HISTOGRAM_ENUMERATION("Media.Audio.Capture.Win.Open", open_result_,	848 UMA_HISTOGRAM_ENUMERATION("Media.Audio.Capture.Win.Open", open_result_,

738 OPEN_RESULT_MAX + 1);	849 OPEN_RESULT_MAX + 1);

739 }	850 }

740	851

	852 double WASAPIAudioInputStream::ProvideInput(AudioBus* audio_bus,

	853 uint32_t frames_delayed) {

	854 if (convert_fifo_) {

	855 int frames = std::min(convert_fifo_->frames(), audio_bus->frames());

	856 convert_fifo_->Consume(audio_bus, 0, frames);

	857 LOG_IF(ERROR, frames != audio_bus->frames())

	858 << "Wanted " << audio_bus->frames() << " got " << frames;

	859 } else {

	860 DCHECK(!data_was_converted_);

	861 convert_bus_->CopyTo(audio_bus);

	862 data_was_converted_ = true;

	863 }

	864

	865 return 1.0;

	866 }

	867

741 } // namespace media	868 } // namespace media

OLD	NEW

« no previous file with comments | « media/audio/win/audio_low_latency_input_win.h ('k') | media/audio/win/audio_low_latency_input_win_unittest.cc » ('j') | no next file with comments »