content/renderer/media/webrtc_audio_device_impl.cc - Issue 8283032: Low-latency AudioInputStream implementation based on WASAPI for Windows.

Side by Side Diff: content/renderer/media/webrtc_audio_device_impl.cc

Issue 8283032: Low-latency AudioInputStream implementation based on WASAPI for Windows. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Added ViewHostMsg_GetHardwareInputSampleRate IPC message Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/renderer/media/webrtc_audio_device_impl.h"	5 #include "content/renderer/media/webrtc_audio_device_impl.h"

6	6

7 #include "base/bind.h"	7 #include "base/bind.h"

8 #include "base/string_util.h"	8 #include "base/string_util.h"

	9 #include "content/common/view_messages.h"

9 #include "content/renderer/render_thread_impl.h"	10 #include "content/renderer/render_thread_impl.h"

10 #include "media/audio/audio_util.h"	11 #include "media/audio/audio_util.h"

11	12

12 static const int64 kMillisecondsBetweenProcessCalls = 5000;	13 static const int64 kMillisecondsBetweenProcessCalls = 5000;

13 static const char kVersion[] = "WebRTC AudioDevice 1.0.0.Chrome";	14 static const char kVersion[] = "WebRTC AudioDevice 1.0.0.Chrome";

14	15

	16 static int GetAudioInputHardwareSampleRate() {
	henrika (OOO until Aug 14) 2011/10/18 08:13:14 I decided to ask for the input sample rate here in I decided to ask for the input sample rate here instead of doing it within the AudioInputDevice class.
	17 static double input_sample_rate = 0;

	18 if (!input_sample_rate) {

	19 RenderThreadImpl::current()->Send(

	20 new ViewHostMsg_GetHardwareInputSampleRate(&input_sample_rate));

	21 }

	22 return static_cast<int>(input_sample_rate);

	23 }

	24

15 WebRtcAudioDeviceImpl::WebRtcAudioDeviceImpl()	25 WebRtcAudioDeviceImpl::WebRtcAudioDeviceImpl()

16 : ref_count_(0),	26 : ref_count_(0),

17 render_loop_(base::MessageLoopProxy::current()),	27 render_loop_(base::MessageLoopProxy::current()),

18 audio_transport_callback_(NULL),	28 audio_transport_callback_(NULL),

19 input_buffer_size_(0),	29 input_buffer_size_(0),

20 output_buffer_size_(0),	30 output_buffer_size_(0),

21 input_channels_(0),	31 input_channels_(0),

22 output_channels_(0),	32 output_channels_(0),

23 input_sample_rate_(0),	33 input_sample_rate_(0),

24 output_sample_rate_(0),	34 output_sample_rate_(0),

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
65	75

66 // Store the reported audio delay locally.	76 // Store the reported audio delay locally.

67 output_delay_ms_ = audio_delay_milliseconds;	77 output_delay_ms_ = audio_delay_milliseconds;

68	78

69 const int channels = audio_data.size();	79 const int channels = audio_data.size();

70 DCHECK_LE(channels, output_channels_);	80 DCHECK_LE(channels, output_channels_);

71	81

72 int samples_per_sec = static_cast<int>(output_sample_rate_);	82 int samples_per_sec = static_cast<int>(output_sample_rate_);

73 if (samples_per_sec == 44100) {	83 if (samples_per_sec == 44100) {

74 // Even if the hardware runs at 44.1kHz, we use 44.0 internally.	84 // Even if the hardware runs at 44.1kHz, we use 44.0 internally.

75 // Can only happen on Mac OS X currently since Windows and Mac

76 // both uses 48kHz.

77 samples_per_sec = 44000;	85 samples_per_sec = 44000;

78 }	86 }

79 uint32_t samples_per_10_msec = (samples_per_sec / 100);	87 uint32_t samples_per_10_msec = (samples_per_sec / 100);

80 const int bytes_per_10_msec =	88 const int bytes_per_10_msec =

81 channels * samples_per_10_msec * bytes_per_sample_;	89 channels * samples_per_10_msec * bytes_per_sample_;

82	90

83 uint32_t num_audio_samples = 0;	91 uint32_t num_audio_samples = 0;

84 size_t accumulated_audio_samples = 0;	92 size_t accumulated_audio_samples = 0;

85	93

86 char* audio_byte_buffer = reinterpret_cast<char*>(output_buffer_.get());	94 char* audio_byte_buffer = reinterpret_cast<char*>(output_buffer_.get());

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
125 const int channels = audio_data.size();	133 const int channels = audio_data.size();

126 DCHECK_LE(channels, input_channels_);	134 DCHECK_LE(channels, input_channels_);

127 uint32_t new_mic_level = 0;	135 uint32_t new_mic_level = 0;

128	136

129 // Interleave, scale, and clip input to int16 and store result in	137 // Interleave, scale, and clip input to int16 and store result in

130 // a local byte buffer.	138 // a local byte buffer.

131 media::InterleaveFloatToInt16(audio_data,	139 media::InterleaveFloatToInt16(audio_data,

132 input_buffer_.get(),	140 input_buffer_.get(),

133 number_of_frames);	141 number_of_frames);

134	142

135 const int samples_per_sec = static_cast<int>(input_sample_rate_);	143 int samples_per_sec = static_cast<int>(input_sample_rate_);

	144 if (samples_per_sec == 44100) {

	145 // Even if the hardware runs at 44.1kHz, we use 44.0 internally.

	146 samples_per_sec = 44000;

	147 }

136 const int samples_per_10_msec = (samples_per_sec / 100);	148 const int samples_per_10_msec = (samples_per_sec / 100);

137 const int bytes_per_10_msec =	149 const int bytes_per_10_msec =

138 channels * samples_per_10_msec * bytes_per_sample_;	150 channels * samples_per_10_msec * bytes_per_sample_;

139 size_t accumulated_audio_samples = 0;	151 size_t accumulated_audio_samples = 0;

140	152

141 char* audio_byte_buffer = reinterpret_cast<char*>(input_buffer_.get());	153 char* audio_byte_buffer = reinterpret_cast<char*>(input_buffer_.get());

142	154

143 // Write audio samples in blocks of 10 milliseconds to the registered	155 // Write audio samples in blocks of 10 milliseconds to the registered

144 // webrtc::AudioTransport sink. Keep writing until our internal byte	156 // webrtc::AudioTransport sink. Keep writing until our internal byte

145 // buffer is empty.	157 // buffer is empty.

(...skipping 121 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
267	279

268 // Calling Init() multiple times in a row is OK.	280 // Calling Init() multiple times in a row is OK.

269 if (initialized_)	281 if (initialized_)

270 return 0;	282 return 0;

271	283

272 DCHECK(!audio_input_device_);	284 DCHECK(!audio_input_device_);

273 DCHECK(!audio_output_device_);	285 DCHECK(!audio_output_device_);

274 DCHECK(!input_buffer_.get());	286 DCHECK(!input_buffer_.get());

275 DCHECK(!output_buffer_.get());	287 DCHECK(!output_buffer_.get());

276	288

277 // TODO(henrika): add AudioInputDevice::GetAudioHardwareSampleRate().

278 // Assume that input and output sample rates are identical for now.

279

280 // Ask the browser for the default audio output hardware sample-rate.	289 // Ask the browser for the default audio output hardware sample-rate.

281 // This request is based on a synchronous IPC message.	290 // This request is based on a synchronous IPC message.

282 int output_sample_rate =	291 int output_sample_rate =

283 static_cast<int>(AudioDevice::GetAudioHardwareSampleRate());	292 static_cast<int>(AudioDevice::GetAudioHardwareSampleRate());

284 VLOG(1) << "Audio hardware sample rate: " << output_sample_rate;	293 VLOG(1) << "Audio output hardware sample rate: " << output_sample_rate;

	294

	295 // Ask the browser for the default audio input hardware sample-rate.

	296 // This request is based on a synchronous IPC message.

	297 int input_sample_rate = GetAudioInputHardwareSampleRate();
	henrika (OOO until Aug 14) 2011/10/18 08:13:14 We now ask for the input sample rate as well. We now ask for the input sample rate as well.
	298 VLOG(1) << "Audio input hardware sample rate: " << input_sample_rate;

285	299

286 int input_channels = 0;	300 int input_channels = 0;

287 int output_channels = 0;	301 int output_channels = 0;

288	302

289 size_t input_buffer_size = 0;	303 size_t input_buffer_size = 0;

290 size_t output_buffer_size = 0;	304 size_t output_buffer_size = 0;

291	305

292 // For real-time audio (in combination with the webrtc::VoiceEngine) it	306 // For real-time audio (in combination with the webrtc::VoiceEngine) it

293 // is convenient to use audio buffers of size N*10ms.	307 // is convenient to use audio buffers of size N*10ms.

	308

294 #if defined(OS_WIN)	309 #if defined(OS_WIN)

295 if (output_sample_rate != 48000) {	310 if (output_sample_rate != 48000) {

296 DLOG(ERROR) << "Only 48kHz sample rate is supported on Windows.";	311 DLOG(ERROR) << "Only 48kHz sample rate is supported on Windows.";

297 return -1;	312 return -1;

298 }	313 }

299 input_channels = 1;	314

	315 // Use stereo recording on Windows since low-latency Core Audio (WASAPI)

	316 // does not support mono.

	317 input_channels = 2;

300 output_channels = 1;	318 output_channels = 1;

301 // Capture side: AUDIO_PCM_LINEAR on Windows is based on a callback-	319

302 // driven Wave implementation where 3 buffers are used for recording audio.	320 // Capture side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI)

303 // Each buffer is of the size that we specify here and using more than one	321 // API which was introduced in Windows Vista. For lower Windows versions,

304 // does not increase the delay but only adds robustness against dropping	322 // a callback-driven Wave implementation is used instead. An input buffer

305 // audio. It might also affect the initial start-up time before callbacks	323 // size of 10ms works well for both these implementations.

306 // start to pump. Real-time tests have shown that a buffer size of 10ms	324

307 // works fine on the capture side.	325 // Use different buffer sizes depending on the current hardware sample rate.

308 input_buffer_size = 480;	326 if (input_sample_rate == 48000) {

	327 input_buffer_size = 480;

	328 } else {

	329 // We do run at 44.1kHz at the actual audio layer, but ask for frames

	330 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

	331 input_buffer_size = 440;

	332 }

	333

309 // Rendering side: AUDIO_PCM_LOW_LATENCY on Windows is based on a callback-	334 // Rendering side: AUDIO_PCM_LOW_LATENCY on Windows is based on a callback-

310 // driven Wave implementation where 2 buffers are fed to the audio driver	335 // driven Wave implementation where 2 buffers are fed to the audio driver

311 // before actual rendering starts. Initial real-time tests have shown that	336 // before actual rendering starts. Initial real-time tests have shown that

312 // 20ms buffer size (corresponds to ~40ms total delay) is not enough but	337 // 20ms buffer size (corresponds to ~40ms total delay) is not enough but

313 // can lead to buffer underruns. The next even multiple of 10ms is 30ms	338 // can lead to buffer underruns. The next even multiple of 10ms is 30ms

314 // (<=> ~60ms total delay) and it works fine also under high load.	339 // (<=> ~60ms total delay) and it works fine also under high load.

315 output_buffer_size = 3 * 480;	340 output_buffer_size = 3 * 480;

316 #elif defined(OS_MACOSX)	341 #elif defined(OS_MACOSX)

317 if (output_sample_rate != 48000 && output_sample_rate != 44100) {	342 if (output_sample_rate != 48000 && output_sample_rate != 44100) {

318 DLOG(ERROR) << "Only 48 and 44.1kHz sample rates are supported on Mac OSX.";	343 DLOG(ERROR) << "Only 48 and 44.1kHz sample rates are supported on Mac OSX.";

319 return -1;	344 return -1;

320 }	345 }

321 input_channels = 1;	346 input_channels = 1;

322 output_channels = 1;	347 output_channels = 1;

	348

323 // Rendering side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback-	349 // Rendering side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback-

324 // driven Core Audio implementation. Tests have shown that 10ms is a suitable	350 // driven Core Audio implementation. Tests have shown that 10ms is a suitable

325 // frame size to use, both for 48kHz and 44.1kHz.	351 // frame size to use, both for 48kHz and 44.1kHz.

326 // Capturing side: AUDIO_PCM_LINEAR on Mac OS X uses the Audio Queue Services	352 // Capturing side: AUDIO_PCM_LINEAR on Mac OS X uses the Audio Queue Services

327 // API which is not well suited for real-time applications since the delay	353 // API which is not well suited for real-time applications since the delay

328 // is very high. We set buffer sizes to 10ms for the input side here as well	354 // is very high. We set buffer sizes to 10ms for the input side here as well

329 // but none of them will work.	355 // but none of them will work.

330 // TODO(henrika): add support for AUDIO_PCM_LOW_LATENCY on the capture side	356 // TODO(henrika): add support for AUDIO_PCM_LOW_LATENCY on the capture side

331 // based on the Mac OS X Core Audio API.	357 // based on the Mac OS X Core Audio API.

332	358

333 // Use different buffer sizes depending on the current hardware sample rate.	359 // Use different buffer sizes depending on the current hardware sample rate.

334 if (output_sample_rate == 48000) {	360 if (output_sample_rate == 48000) {

335 input_buffer_size = 480;	361 input_buffer_size = 480;

336 output_buffer_size = 480;	362 output_buffer_size = 480;

337 } else {	363 } else {

338 // We do run at 44.1kHz at the actual audio layer, but ask for frames	364 // We do run at 44.1kHz at the actual audio layer, but ask for frames

339 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.	365 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

340 input_buffer_size = 440;	366 input_buffer_size = 440;

341 output_buffer_size = 440;	367 output_buffer_size = 440;

342 }	368 }

343 #elif defined(OS_LINUX)	369 #elif defined(OS_LINUX)

344 if (output_sample_rate != 48000) {	370 if (output_sample_rate != 48000) {

345 DLOG(ERROR) << "Only 48kHz sample rate is supported on Linux.";	371 DLOG(ERROR) << "Only 48kHz sample rate is supported on Linux.";

346 return -1;	372 return -1;

347 }	373 }

348 input_channels = 1;	374 input_channels = 1;

349 output_channels = 1;	375 output_channels = 1;

	376

350 // Based on tests using the current ALSA implementation in Chrome, we have	377 // Based on tests using the current ALSA implementation in Chrome, we have

351 // found that the best combination is 20ms on the input side and 10ms on the	378 // found that the best combination is 20ms on the input side and 10ms on the

352 // output side.	379 // output side.

353 // TODO(henrika): It might be possible to reduce the input buffer	380 // TODO(henrika): It might be possible to reduce the input buffer

354 // size and reduce the delay even more.	381 // size and reduce the delay even more.

355 input_buffer_size = 2 * 480;	382 input_buffer_size = 2 * 480;

356 output_buffer_size = 480;	383 output_buffer_size = 480;

357 #else	384 #else

358 DLOG(ERROR) << "Unsupported platform";	385 DLOG(ERROR) << "Unsupported platform";

359 return -1;	386 return -1;

360 #endif	387 #endif

361	388

362 // Store utilized parameters to ensure that we can check them	389 // Store utilized parameters to ensure that we can check them

363 // after a successful initialization.	390 // after a successful initialization.

364 output_buffer_size_ = output_buffer_size;	391 output_buffer_size_ = output_buffer_size;

365 output_channels_ = output_channels;	392 output_channels_ = output_channels;

366 output_sample_rate_ = static_cast<double>(output_sample_rate);	393 output_sample_rate_ = static_cast<double>(output_sample_rate);

367	394

368 input_buffer_size_ = input_buffer_size;	395 input_buffer_size_ = input_buffer_size;

369 input_channels_ = input_channels;	396 input_channels_ = input_channels;

370 // TODO(henrika): we use same rate as on output for now.	397 input_sample_rate_ = input_sample_rate;

371 input_sample_rate_ = output_sample_rate_;

372	398

373 // Create and configure the audio capturing client.	399 // Create and configure the audio capturing client.

374 audio_input_device_ = new AudioInputDevice(	400 audio_input_device_ = new AudioInputDevice(

375 input_buffer_size, input_channels, output_sample_rate, this, this);	401 input_buffer_size, input_channels, input_sample_rate, this, this);
	henrika (OOO until Aug 14) 2011/10/18 08:13:14 Not using same rate as output any longer since we Not using same rate as output any longer since we can now ask for native input sample as well.
376 #if defined(OS_MACOSX)

377 // We create the input device for Mac as well but the performance

378 // will be very bad.

379 DLOG(WARNING) << "Real-time recording is not yet supported on Mac OS X";

380 #endif

381	402

382 // Create and configure the audio rendering client.	403 // Create and configure the audio rendering client.

383 audio_output_device_ = new AudioDevice(	404 audio_output_device_ = new AudioDevice(

384 output_buffer_size, output_channels, output_sample_rate, this);	405 output_buffer_size, output_channels, output_sample_rate, this);

385	406

386 DCHECK(audio_input_device_);	407 DCHECK(audio_input_device_);

387 DCHECK(audio_output_device_);	408 DCHECK(audio_output_device_);

388	409

389 // Allocate local audio buffers based on the parameters above.	410 // Allocate local audio buffers based on the parameters above.

390 // It is assumed that each audio sample contains 16 bits and each	411 // It is assumed that each audio sample contains 16 bits and each

(...skipping 532 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
923 }	944 }

924	945

925 int32_t WebRtcAudioDeviceImpl::GetLoudspeakerStatus(bool* enabled) const {	946 int32_t WebRtcAudioDeviceImpl::GetLoudspeakerStatus(bool* enabled) const {

926 NOTIMPLEMENTED();	947 NOTIMPLEMENTED();

927 return -1;	948 return -1;

928 }	949 }

929	950

930 void WebRtcAudioDeviceImpl::SetSessionId(int session_id) {	951 void WebRtcAudioDeviceImpl::SetSessionId(int session_id) {

931 session_id_ = session_id;	952 session_id_ = session_id;

932 }	953 }

OLD	NEW

« content/renderer/media/audio_input_device.cc ('K') | « content/renderer/media/audio_input_device.cc ('k') | media/audio/audio_util.h » ('j') | no next file with comments »