Index: content/renderer/media/webrtc_audio_renderer.cc |
diff --git a/content/renderer/media/webrtc_audio_renderer.cc b/content/renderer/media/webrtc_audio_renderer.cc |
index c21c6624869c6187e18b9d6a46c71e7a7f7874bb..5d7ea39e1d8a0bc30df4ed59170d48f24c91a9ff 100644 |
--- a/content/renderer/media/webrtc_audio_renderer.cc |
+++ b/content/renderer/media/webrtc_audio_renderer.cc |
@@ -11,6 +11,7 @@ |
#include "content/renderer/media/audio_hardware.h" |
#include "content/renderer/media/renderer_audio_output_device.h" |
#include "content/renderer/media/webrtc_audio_device_impl.h" |
+#include "media/audio/audio_parameters.h" |
#include "media/audio/audio_util.h" |
#include "media/audio/sample_rates.h" |
#if defined(OS_WIN) |
@@ -28,14 +29,14 @@ namespace { |
// for its current sample rate (set by the user) on Windows and Mac OS X. |
// The listed rates below adds restrictions and Initialize() |
// will fail if the user selects any rate outside these ranges. |
-int kValidOutputRates[] = {96000, 48000, 44100}; |
+const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000}; |
#elif defined(OS_LINUX) || defined(OS_OPENBSD) |
-int kValidOutputRates[] = {48000, 44100}; |
+const int kValidOutputRates[] = {48000, 44100}; |
#elif defined(OS_ANDROID) |
// On Android, the most popular sampling rate is 16000. |
-int kValidOutputRates[] = {48000, 44100, 16000}; |
+const int kValidOutputRates[] = {48000, 44100, 16000}; |
#else |
-int kValidOutputRates[] = {44100}; |
+const int kValidOutputRates[] = {44100}; |
#endif |
// TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove. |
@@ -90,11 +91,14 @@ WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id) |
} |
WebRtcAudioRenderer::~WebRtcAudioRenderer() { |
+ DCHECK(thread_checker_.CalledOnValidThread()); |
DCHECK_EQ(state_, UNINITIALIZED); |
buffer_.reset(); |
} |
bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { |
+ DVLOG(1) << "WebRtcAudioRenderer::Initialize()"; |
+ DCHECK(thread_checker_.CalledOnValidThread()); |
base::AutoLock auto_lock(lock_); |
DCHECK_EQ(state_, UNINITIALIZED); |
DCHECK(source); |
@@ -121,102 +125,88 @@ bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { |
return false; |
} |
- media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO; |
+ // Set up audio parameters for the source, i.e., the WebRTC client. |
+ // The WebRTC client only supports multiples of 10ms as buffer size where |
+ // 10ms is preferred for lowest possible delay. |
+ media::AudioParameters source_params; |
int buffer_size = 0; |
- // Windows |
-#if defined(OS_WIN) |
- // Always use stereo rendering on Windows. |
- channel_layout = media::CHANNEL_LAYOUT_STEREO; |
- |
- // Render side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI) |
- // API which was introduced in Windows Vista. For lower Windows versions, |
- // a callback-driven Wave implementation is used instead. An output buffer |
- // size of 10ms works well for WASAPI but 30ms is needed for Wave. |
- |
- // Use different buffer sizes depending on the current hardware sample rate. |
- if (sample_rate == 96000 || sample_rate == 48000) { |
+ if (sample_rate % 8000 == 0) { |
buffer_size = (sample_rate / 100); |
+ } else if (sample_rate == 44100) { |
+ // The resampler in WebRTC does not support 441 as input. We hard code |
+ // the size to 440 (~0.9977ms) instead and rely on the internal jitter |
+ // buffer in WebRTC to deal with the resulting drift. |
+ // TODO(henrika): ensure that WebRTC supports 44100Hz and use 441 instead. |
+ buffer_size = 440; |
} else { |
- // We do run at 44.1kHz at the actual audio layer, but ask for frames |
- // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine. |
- // TODO(henrika): figure out why we seem to need 20ms here for glitch- |
- // free audio. |
- buffer_size = 2 * 440; |
+ return false; |
} |
- // Windows XP and lower can't cope with 10 ms output buffer size. |
- // It must be extended to 30 ms (60 ms will be used internally by WaveOut). |
- // Note that we can't use media::CoreAudioUtil::IsSupported() here since it |
- // tries to load the Audioses.dll and it will always fail in the render |
- // process. |
- if (base::win::GetVersion() < base::win::VERSION_VISTA) { |
- buffer_size = 3 * buffer_size; |
- DLOG(WARNING) << "Extending the output buffer size by a factor of three " |
- << "since Windows XP has been detected."; |
- } |
-#elif defined(OS_MACOSX) |
- channel_layout = media::CHANNEL_LAYOUT_MONO; |
+ source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, |
+ media::CHANNEL_LAYOUT_STEREO, |
+ sample_rate, 16, buffer_size); |
- // Render side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback- |
- // driven Core Audio implementation. Tests have shown that 10ms is a suitable |
- // frame size to use for 96kHz, 48kHz and 44.1kHz. |
+ // Set up audio parameters for the sink, i.e., the native audio output stream. |
+ // We strive to open up using native parameters to achieve best possible |
+ // performance and to ensure that no FIFO is needed on the browser side to |
+ // match the client request. Any mismatch between the source and the sink is |
+ // taken care of in this class instead using a pull FIFO. |
- // Use different buffer sizes depending on the current hardware sample rate. |
- if (sample_rate == 96000 || sample_rate == 48000) { |
- buffer_size = (sample_rate / 100); |
- } else { |
- // We do run at 44.1kHz at the actual audio layer, but ask for frames |
- // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine. |
- buffer_size = 440; |
- } |
-#elif defined(OS_LINUX) || defined(OS_OPENBSD) |
- channel_layout = media::CHANNEL_LAYOUT_MONO; |
+ media::AudioParameters sink_params; |
- // Based on tests using the current ALSA implementation in Chrome, we have |
- // found that 10ms buffer size on the output side works fine. |
- buffer_size = 480; |
-#elif defined(OS_ANDROID) |
- channel_layout = media::CHANNEL_LAYOUT_MONO; |
- |
- // The buffer size lower than GetAudioHardwareBufferSize() will lead to |
- // choppy sound because AudioOutputResampler will read the buffer multiple |
- // times in a row without allowing the client to re-fill the buffer. |
- // TODO(dwkang): check if 2048 - GetAudioHardwareBufferSize() is the right |
- // value for Android and do further tuning. |
- buffer_size = 2048; |
-#else |
- DLOG(ERROR) << "Unsupported platform"; |
- return false; |
+#if defined(OS_WIN) |
+ // TODO(henrika): sort out Windows XP support. |
#endif |
- // Store utilized parameters to ensure that we can check them |
- // after a successful initialization. |
- params_.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout, |
- sample_rate, 16, buffer_size); |
+ buffer_size = GetAudioOutputBufferSize(); |
+ sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, |
+ media::CHANNEL_LAYOUT_STEREO, |
+ sample_rate, 16, buffer_size); |
+ |
+ // Create a FIFO if re-buffering is required to match the source input with |
+ // the sink request. The source acts as provider here and the sink as |
+ // consumer. |
+ if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) { |
+ DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer() |
+ << " to " << sink_params.frames_per_buffer(); |
+ audio_fifo_.reset(new media::AudioPullFifo( |
+ source_params.channels(), |
+ source_params.frames_per_buffer(), |
+ base::Bind( |
+ &WebRtcAudioRenderer::SourceCallback, |
+ base::Unretained(this)))); |
+ } |
+ |
+ frame_duration_milliseconds_ = base::Time::kMillisecondsPerSecond / |
+ static_cast<double>(source_params.sample_rate()); |
// Allocate local audio buffers based on the parameters above. |
// It is assumed that each audio sample contains 16 bits and each |
// audio frame contains one or two audio samples depending on the |
// number of channels. |
- buffer_.reset(new int16[params_.frames_per_buffer() * params_.channels()]); |
+ buffer_.reset( |
+ new int16[source_params.frames_per_buffer() * source_params.channels()]); |
source_ = source; |
- source->SetRenderFormat(params_); |
+ source->SetRenderFormat(source_params); |
- // Configure the audio rendering client and start the rendering. |
- sink_->Initialize(params_, this); |
+ // Configure the audio rendering client and start rendering. |
+ sink_->Initialize(sink_params, this); |
sink_->SetSourceRenderView(source_render_view_id_); |
sink_->Start(); |
+ // User must call Play() before any audio can be heard. |
state_ = PAUSED; |
UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout", |
- channel_layout, media::CHANNEL_LAYOUT_MAX); |
+ source_params.channel_layout(), |
+ media::CHANNEL_LAYOUT_MAX); |
UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer", |
- buffer_size, kUnexpectedAudioBufferSize); |
- AddHistogramFramesPerBuffer(buffer_size); |
+ source_params.frames_per_buffer(), |
+ kUnexpectedAudioBufferSize); |
+ AddHistogramFramesPerBuffer(source_params.frames_per_buffer()); |
return true; |
} |
@@ -227,6 +217,8 @@ void WebRtcAudioRenderer::Start() { |
} |
void WebRtcAudioRenderer::Play() { |
+ DVLOG(1) << "WebRtcAudioRenderer::Play()"; |
+ DCHECK(thread_checker_.CalledOnValidThread()); |
base::AutoLock auto_lock(lock_); |
if (state_ == UNINITIALIZED) |
return; |
@@ -237,6 +229,8 @@ void WebRtcAudioRenderer::Play() { |
} |
void WebRtcAudioRenderer::Pause() { |
+ DVLOG(1) << "WebRtcAudioRenderer::Pause()"; |
+ DCHECK(thread_checker_.CalledOnValidThread()); |
base::AutoLock auto_lock(lock_); |
if (state_ == UNINITIALIZED) |
return; |
@@ -248,6 +242,8 @@ void WebRtcAudioRenderer::Pause() { |
} |
void WebRtcAudioRenderer::Stop() { |
+ DVLOG(1) << "WebRtcAudioRenderer::Stop()"; |
+ DCHECK(thread_checker_.CalledOnValidThread()); |
base::AutoLock auto_lock(lock_); |
if (state_ == UNINITIALIZED) |
return; |
@@ -259,6 +255,7 @@ void WebRtcAudioRenderer::Stop() { |
} |
void WebRtcAudioRenderer::SetVolume(float volume) { |
+ DCHECK(thread_checker_.CalledOnValidThread()); |
base::AutoLock auto_lock(lock_); |
if (state_ == UNINITIALIZED) |
return; |
@@ -276,26 +273,18 @@ bool WebRtcAudioRenderer::IsLocalRenderer() const { |
int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus, |
int audio_delay_milliseconds) { |
- { |
- base::AutoLock auto_lock(lock_); |
- if (!source_) |
- return 0; |
- // We need to keep render data for the |source_| reglardless of |state_|, |
- // otherwise the data will be buffered up inside |source_|. |
- source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()), |
- audio_bus->channels(), audio_bus->frames(), |
- audio_delay_milliseconds); |
- |
- // Return 0 frames to play out silence if |state_| is not PLAYING. |
- if (state_ != PLAYING) |
- return 0; |
- } |
+ base::AutoLock auto_lock(lock_); |
+ if (!source_) |
+ return 0; |
- // Deinterleave each channel and convert to 32-bit floating-point |
- // with nominal range -1.0 -> +1.0 to match the callback format. |
- audio_bus->FromInterleaved(buffer_.get(), audio_bus->frames(), |
- params_.bits_per_sample() / 8); |
- return audio_bus->frames(); |
+ audio_delay_milliseconds_ = audio_delay_milliseconds; |
+ |
+ if (audio_fifo_) |
+ audio_fifo_->Consume(audio_bus, audio_bus->frames()); |
+ else |
+ SourceCallback(0, audio_bus); |
+ |
+ return (state_ == PLAYING) ? audio_bus->frames() : 0; |
} |
void WebRtcAudioRenderer::OnRenderError() { |
@@ -303,4 +292,32 @@ void WebRtcAudioRenderer::OnRenderError() { |
LOG(ERROR) << "OnRenderError()"; |
} |
+// Called by AudioPullFifo when more data is necessary. |
+void WebRtcAudioRenderer::SourceCallback( |
+ int fifo_frame_delay, media::AudioBus* audio_bus) { |
+ DVLOG(2) << "WebRtcAudioRenderer::SourceCallback(" |
+ << fifo_frame_delay << ", " |
+ << audio_bus->frames() << ")"; |
+ |
+ audio_delay_milliseconds_ += frame_duration_milliseconds_ * fifo_frame_delay; |
+ DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds_; |
+ |
+ // We need to keep render data for the |source_| regardless of |state_|, |
+ // otherwise the data will be buffered up inside |source_|. |
+ source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()), |
+ audio_bus->channels(), audio_bus->frames(), |
+ audio_delay_milliseconds_); |
+ |
+ // Avoid filling up the audio bus if we are not playing; instead |
+ // return here and ensure that the returned value in Render() is 0. |
+ if (state_ != PLAYING) |
+ return; |
+ |
+ // De-interleave each channel and convert to 32-bit floating-point |
+ // with nominal range -1.0 -> +1.0 to match the callback format. |
+ audio_bus->FromInterleaved(buffer_.get(), |
+ audio_bus->frames(), |
+ sizeof(buffer_[0])); |
+} |
+ |
} // namespace content |