content/renderer/media/webrtc_audio_renderer.cc - Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio

Unified Diff: content/renderer/media/webrtc_audio_renderer.cc

Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Moved FillRenderEndpointBufferWithSilence to CoreAudioUtil Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « content/renderer/media/webrtc_audio_renderer.h ('k') | content/test/webrtc_audio_device_test.h » ('j') | media/audio/win/audio_low_latency_output_win.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/renderer/media/webrtc_audio_renderer.cc

diff --git a/content/renderer/media/webrtc_audio_renderer.cc b/content/renderer/media/webrtc_audio_renderer.cc

index c21c6624869c6187e18b9d6a46c71e7a7f7874bb..5d7ea39e1d8a0bc30df4ed59170d48f24c91a9ff 100644

--- a/content/renderer/media/webrtc_audio_renderer.cc

+++ b/content/renderer/media/webrtc_audio_renderer.cc

@@ -11,6 +11,7 @@

#include "content/renderer/media/audio_hardware.h"

#include "content/renderer/media/renderer_audio_output_device.h"

#include "content/renderer/media/webrtc_audio_device_impl.h"

+#include "media/audio/audio_parameters.h"

#include "media/audio/audio_util.h"

#include "media/audio/sample_rates.h"

#if defined(OS_WIN)

@@ -28,14 +29,14 @@ namespace {

// for its current sample rate (set by the user) on Windows and Mac OS X.

// The listed rates below adds restrictions and Initialize()

// will fail if the user selects any rate outside these ranges.

-int kValidOutputRates[] = {96000, 48000, 44100};

+const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};

#elif defined(OS_LINUX) || defined(OS_OPENBSD)

-int kValidOutputRates[] = {48000, 44100};

+const int kValidOutputRates[] = {48000, 44100};

#elif defined(OS_ANDROID)

// On Android, the most popular sampling rate is 16000.

-int kValidOutputRates[] = {48000, 44100, 16000};

+const int kValidOutputRates[] = {48000, 44100, 16000};

#else

-int kValidOutputRates[] = {44100};

+const int kValidOutputRates[] = {44100};

#endif

// TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.

@@ -90,11 +91,14 @@ WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)

}

WebRtcAudioRenderer::~WebRtcAudioRenderer() {

+ DCHECK(thread_checker_.CalledOnValidThread());

DCHECK_EQ(state_, UNINITIALIZED);

buffer_.reset();

}

bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {

+ DVLOG(1) << "WebRtcAudioRenderer::Initialize()";

+ DCHECK(thread_checker_.CalledOnValidThread());

base::AutoLock auto_lock(lock_);

DCHECK_EQ(state_, UNINITIALIZED);

DCHECK(source);

@@ -121,102 +125,88 @@ bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {

return false;

}

- media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;

+ // Set up audio parameters for the source, i.e., the WebRTC client.

+ // The WebRTC client only supports multiples of 10ms as buffer size where

+ // 10ms is preferred for lowest possible delay.

+ media::AudioParameters source_params;

int buffer_size = 0;

- // Windows

-#if defined(OS_WIN)

- // Always use stereo rendering on Windows.

- channel_layout = media::CHANNEL_LAYOUT_STEREO;

- // Render side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI)

- // API which was introduced in Windows Vista. For lower Windows versions,

- // a callback-driven Wave implementation is used instead. An output buffer

- // size of 10ms works well for WASAPI but 30ms is needed for Wave.

- // Use different buffer sizes depending on the current hardware sample rate.

- if (sample_rate == 96000 || sample_rate == 48000) {

+ if (sample_rate % 8000 == 0) {

buffer_size = (sample_rate / 100);

+ } else if (sample_rate == 44100) {

+ // The resampler in WebRTC does not support 441 as input. We hard code

+ // the size to 440 (~0.9977ms) instead and rely on the internal jitter

+ // buffer in WebRTC to deal with the resulting drift.

+ // TODO(henrika): ensure that WebRTC supports 44100Hz and use 441 instead.

+ buffer_size = 440;

} else {

- // We do run at 44.1kHz at the actual audio layer, but ask for frames

- // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

- // TODO(henrika): figure out why we seem to need 20ms here for glitch-

- // free audio.

- buffer_size = 2 * 440;

+ return false;

}

- // Windows XP and lower can't cope with 10 ms output buffer size.

- // It must be extended to 30 ms (60 ms will be used internally by WaveOut).

- // Note that we can't use media::CoreAudioUtil::IsSupported() here since it

- // tries to load the Audioses.dll and it will always fail in the render

- // process.

- if (base::win::GetVersion() < base::win::VERSION_VISTA) {

- buffer_size = 3 * buffer_size;

- DLOG(WARNING) << "Extending the output buffer size by a factor of three "

- << "since Windows XP has been detected.";

- }

-#elif defined(OS_MACOSX)

- channel_layout = media::CHANNEL_LAYOUT_MONO;

+ source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

+ media::CHANNEL_LAYOUT_STEREO,

+ sample_rate, 16, buffer_size);

- // Render side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback-

- // driven Core Audio implementation. Tests have shown that 10ms is a suitable

- // frame size to use for 96kHz, 48kHz and 44.1kHz.

+ // Set up audio parameters for the sink, i.e., the native audio output stream.

+ // We strive to open up using native parameters to achieve best possible

+ // performance and to ensure that no FIFO is needed on the browser side to

+ // match the client request. Any mismatch between the source and the sink is

+ // taken care of in this class instead using a pull FIFO.

- // Use different buffer sizes depending on the current hardware sample rate.

- if (sample_rate == 96000 || sample_rate == 48000) {

- buffer_size = (sample_rate / 100);

- } else {

- // We do run at 44.1kHz at the actual audio layer, but ask for frames

- // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

- buffer_size = 440;

- }

-#elif defined(OS_LINUX) || defined(OS_OPENBSD)

- channel_layout = media::CHANNEL_LAYOUT_MONO;

+ media::AudioParameters sink_params;

- // Based on tests using the current ALSA implementation in Chrome, we have

- // found that 10ms buffer size on the output side works fine.

- buffer_size = 480;

-#elif defined(OS_ANDROID)

- channel_layout = media::CHANNEL_LAYOUT_MONO;

- // The buffer size lower than GetAudioHardwareBufferSize() will lead to

- // choppy sound because AudioOutputResampler will read the buffer multiple

- // times in a row without allowing the client to re-fill the buffer.

- // TODO(dwkang): check if 2048 - GetAudioHardwareBufferSize() is the right

- // value for Android and do further tuning.

- buffer_size = 2048;

-#else

- DLOG(ERROR) << "Unsupported platform";

- return false;

+#if defined(OS_WIN)

+ // TODO(henrika): sort out Windows XP support.

#endif

- // Store utilized parameters to ensure that we can check them

- // after a successful initialization.

- params_.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout,

- sample_rate, 16, buffer_size);

+ buffer_size = GetAudioOutputBufferSize();

+ sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

+ media::CHANNEL_LAYOUT_STEREO,

+ sample_rate, 16, buffer_size);

+ // Create a FIFO if re-buffering is required to match the source input with

+ // the sink request. The source acts as provider here and the sink as

+ // consumer.

+ if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {

+ DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()

+ << " to " << sink_params.frames_per_buffer();

+ audio_fifo_.reset(new media::AudioPullFifo(

+ source_params.channels(),

+ source_params.frames_per_buffer(),

+ base::Bind(

+ &WebRtcAudioRenderer::SourceCallback,

+ base::Unretained(this))));

+ }

+ frame_duration_milliseconds_ = base::Time::kMillisecondsPerSecond /

+ static_cast<double>(source_params.sample_rate());

// Allocate local audio buffers based on the parameters above.

// It is assumed that each audio sample contains 16 bits and each

// audio frame contains one or two audio samples depending on the

// number of channels.

- buffer_.reset(new int16[params_.frames_per_buffer() * params_.channels()]);

+ buffer_.reset(

+ new int16[source_params.frames_per_buffer() * source_params.channels()]);

source_ = source;

- source->SetRenderFormat(params_);

+ source->SetRenderFormat(source_params);

- // Configure the audio rendering client and start the rendering.

- sink_->Initialize(params_, this);

+ // Configure the audio rendering client and start rendering.

+ sink_->Initialize(sink_params, this);

sink_->SetSourceRenderView(source_render_view_id_);

sink_->Start();

+ // User must call Play() before any audio can be heard.

state_ = PAUSED;

UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",

- channel_layout, media::CHANNEL_LAYOUT_MAX);

+ source_params.channel_layout(),

+ media::CHANNEL_LAYOUT_MAX);

UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",

- buffer_size, kUnexpectedAudioBufferSize);

- AddHistogramFramesPerBuffer(buffer_size);

+ source_params.frames_per_buffer(),

+ kUnexpectedAudioBufferSize);

+ AddHistogramFramesPerBuffer(source_params.frames_per_buffer());

return true;

}

@@ -227,6 +217,8 @@ void WebRtcAudioRenderer::Start() {

}

void WebRtcAudioRenderer::Play() {

+ DVLOG(1) << "WebRtcAudioRenderer::Play()";

+ DCHECK(thread_checker_.CalledOnValidThread());

base::AutoLock auto_lock(lock_);

if (state_ == UNINITIALIZED)

return;

@@ -237,6 +229,8 @@ void WebRtcAudioRenderer::Play() {

}

void WebRtcAudioRenderer::Pause() {

+ DVLOG(1) << "WebRtcAudioRenderer::Pause()";

+ DCHECK(thread_checker_.CalledOnValidThread());

base::AutoLock auto_lock(lock_);

if (state_ == UNINITIALIZED)

return;

@@ -248,6 +242,8 @@ void WebRtcAudioRenderer::Pause() {

}

void WebRtcAudioRenderer::Stop() {

+ DVLOG(1) << "WebRtcAudioRenderer::Stop()";

+ DCHECK(thread_checker_.CalledOnValidThread());

base::AutoLock auto_lock(lock_);

if (state_ == UNINITIALIZED)

return;

@@ -259,6 +255,7 @@ void WebRtcAudioRenderer::Stop() {

}

void WebRtcAudioRenderer::SetVolume(float volume) {

+ DCHECK(thread_checker_.CalledOnValidThread());

base::AutoLock auto_lock(lock_);

if (state_ == UNINITIALIZED)

return;

@@ -276,26 +273,18 @@ bool WebRtcAudioRenderer::IsLocalRenderer() const {

int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,

int audio_delay_milliseconds) {

- {

- base::AutoLock auto_lock(lock_);

- if (!source_)

- return 0;

- // We need to keep render data for the |source_| reglardless of |state_|,

- // otherwise the data will be buffered up inside |source_|.

- source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),

- audio_bus->channels(), audio_bus->frames(),

- audio_delay_milliseconds);

- // Return 0 frames to play out silence if |state_| is not PLAYING.

- if (state_ != PLAYING)

- return 0;

- }

+ base::AutoLock auto_lock(lock_);

+ if (!source_)

+ return 0;

- // Deinterleave each channel and convert to 32-bit floating-point

- // with nominal range -1.0 -> +1.0 to match the callback format.

- audio_bus->FromInterleaved(buffer_.get(), audio_bus->frames(),

- params_.bits_per_sample() / 8);

- return audio_bus->frames();

+ audio_delay_milliseconds_ = audio_delay_milliseconds;

+ if (audio_fifo_)

+ audio_fifo_->Consume(audio_bus, audio_bus->frames());

+ else

+ SourceCallback(0, audio_bus);

+ return (state_ == PLAYING) ? audio_bus->frames() : 0;

}

void WebRtcAudioRenderer::OnRenderError() {

@@ -303,4 +292,32 @@ void WebRtcAudioRenderer::OnRenderError() {

LOG(ERROR) << "OnRenderError()";

}

+// Called by AudioPullFifo when more data is necessary.

+void WebRtcAudioRenderer::SourceCallback(

+ int fifo_frame_delay, media::AudioBus* audio_bus) {

+ DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("

+ << fifo_frame_delay << ", "

+ << audio_bus->frames() << ")";

+ audio_delay_milliseconds_ += frame_duration_milliseconds_ * fifo_frame_delay;

+ DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds_;

+ // We need to keep render data for the |source_| regardless of |state_|,

+ // otherwise the data will be buffered up inside |source_|.

+ source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),

+ audio_bus->channels(), audio_bus->frames(),

+ audio_delay_milliseconds_);

+ // Avoid filling up the audio bus if we are not playing; instead

+ // return here and ensure that the returned value in Render() is 0.

+ if (state_ != PLAYING)

+ return;

+ // De-interleave each channel and convert to 32-bit floating-point

+ // with nominal range -1.0 -> +1.0 to match the callback format.

+ audio_bus->FromInterleaved(buffer_.get(),

+ audio_bus->frames(),

+ sizeof(buffer_[0]));

} // namespace content