Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(204)

Side by Side Diff: content/renderer/media/webrtc_audio_renderer.cc

Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Non trivial rebase Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/renderer/media/webrtc_audio_renderer.h" 5 #include "content/renderer/media/webrtc_audio_renderer.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/metrics/histogram.h" 8 #include "base/metrics/histogram.h"
9 #include "base/string_util.h" 9 #include "base/string_util.h"
10 #include "content/renderer/media/audio_device_factory.h" 10 #include "content/renderer/media/audio_device_factory.h"
11 #include "content/renderer/media/renderer_audio_output_device.h" 11 #include "content/renderer/media/renderer_audio_output_device.h"
12 #include "content/renderer/media/webrtc_audio_device_impl.h" 12 #include "content/renderer/media/webrtc_audio_device_impl.h"
13 #include "content/renderer/render_thread_impl.h" 13 #include "content/renderer/render_thread_impl.h"
14 #include "media/audio/audio_util.h" 14 #include "media/audio/audio_parameters.h"
15 #include "media/audio/sample_rates.h" 15 #include "media/audio/sample_rates.h"
16 #include "media/base/audio_hardware_config.h" 16 #include "media/base/audio_hardware_config.h"
17 17
18 #if defined(OS_WIN) 18 #if defined(OS_WIN)
19 #include "base/win/windows_version.h" 19 #include "base/win/windows_version.h"
20 #include "media/audio/win/core_audio_util_win.h" 20 #include "media/audio/win/core_audio_util_win.h"
21 #endif 21 #endif
22 22
23 namespace content { 23 namespace content {
24 24
25 namespace { 25 namespace {
26 26
27 // Supported hardware sample rates for output sides. 27 // Supported hardware sample rates for output sides.
28 #if defined(OS_WIN) || defined(OS_MACOSX) 28 #if defined(OS_WIN) || defined(OS_MACOSX)
29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its 29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
30 // current sample rate (set by the user) on Windows and Mac OS X. The listed 30 // current sample rate (set by the user) on Windows and Mac OS X. The listed
31 // rates below adds restrictions and Initialize() will fail if the user selects 31 // rates below adds restrictions and Initialize() will fail if the user selects
32 // any rate outside these ranges. 32 // any rate outside these ranges.
33 int kValidOutputRates[] = {96000, 48000, 44100}; 33 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
34 #elif defined(OS_LINUX) || defined(OS_OPENBSD) 34 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
35 int kValidOutputRates[] = {48000, 44100}; 35 const int kValidOutputRates[] = {48000, 44100};
36 #elif defined(OS_ANDROID) 36 #elif defined(OS_ANDROID)
37 // On Android, the most popular sampling rate is 16000. 37 // On Android, the most popular sampling rate is 16000.
38 int kValidOutputRates[] = {48000, 44100, 16000}; 38 const int kValidOutputRates[] = {48000, 44100, 16000};
39 #else 39 #else
40 int kValidOutputRates[] = {44100}; 40 const int kValidOutputRates[] = {44100};
41 #endif 41 #endif
42 42
43 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove. 43 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
44 enum AudioFramesPerBuffer { 44 enum AudioFramesPerBuffer {
45 k160, 45 k160,
46 k320, 46 k320,
47 k440, // WebRTC works internally with 440 audio frames at 44.1kHz. 47 k440, // WebRTC works internally with 440 audio frames at 44.1kHz.
48 k480, 48 k480,
49 k640, 49 k640,
50 k880, 50 k880,
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
85 } // namespace 85 } // namespace
86 86
87 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id) 87 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)
88 : state_(UNINITIALIZED), 88 : state_(UNINITIALIZED),
89 source_render_view_id_(source_render_view_id), 89 source_render_view_id_(source_render_view_id),
90 source_(NULL), 90 source_(NULL),
91 play_ref_count_(0) { 91 play_ref_count_(0) {
92 } 92 }
93 93
94 WebRtcAudioRenderer::~WebRtcAudioRenderer() { 94 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
95 DCHECK(thread_checker_.CalledOnValidThread());
95 DCHECK_EQ(state_, UNINITIALIZED); 96 DCHECK_EQ(state_, UNINITIALIZED);
96 buffer_.reset(); 97 buffer_.reset();
97 } 98 }
98 99
99 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) { 100 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
101 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
102 DCHECK(thread_checker_.CalledOnValidThread());
100 base::AutoLock auto_lock(lock_); 103 base::AutoLock auto_lock(lock_);
101 DCHECK_EQ(state_, UNINITIALIZED); 104 DCHECK_EQ(state_, UNINITIALIZED);
102 DCHECK(source); 105 DCHECK(source);
103 DCHECK(!sink_); 106 DCHECK(!sink_);
104 DCHECK(!source_); 107 DCHECK(!source_);
105 108
106 sink_ = AudioDeviceFactory::NewOutputDevice(); 109 sink_ = AudioDeviceFactory::NewOutputDevice();
107 DCHECK(sink_); 110 DCHECK(sink_);
108 111
109 // Ask the renderer for the default audio output hardware sample-rate. 112 // Ask the renderer for the default audio output hardware sample-rate.
110 media::AudioHardwareConfig* hardware_config = 113 media::AudioHardwareConfig* hardware_config =
111 RenderThreadImpl::current()->GetAudioHardwareConfig(); 114 RenderThreadImpl::current()->GetAudioHardwareConfig();
112 int sample_rate = hardware_config->GetOutputSampleRate(); 115 int sample_rate = hardware_config->GetOutputSampleRate();
113 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate; 116 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
114 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputSampleRate", 117 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputSampleRate",
115 sample_rate, media::kUnexpectedAudioSampleRate); 118 sample_rate, media::kUnexpectedAudioSampleRate);
116 119
117 // Verify that the reported output hardware sample rate is supported 120 // Verify that the reported output hardware sample rate is supported
118 // on the current platform. 121 // on the current platform.
119 if (std::find(&kValidOutputRates[0], 122 if (std::find(&kValidOutputRates[0],
120 &kValidOutputRates[0] + arraysize(kValidOutputRates), 123 &kValidOutputRates[0] + arraysize(kValidOutputRates),
121 sample_rate) == 124 sample_rate) ==
122 &kValidOutputRates[arraysize(kValidOutputRates)]) { 125 &kValidOutputRates[arraysize(kValidOutputRates)]) {
123 DLOG(ERROR) << sample_rate << " is not a supported output rate."; 126 DLOG(ERROR) << sample_rate << " is not a supported output rate.";
124 return false; 127 return false;
125 } 128 }
126 129
127 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO; 130 // Set up audio parameters for the source, i.e., the WebRTC client.
131 // The WebRTC client only supports multiples of 10ms as buffer size where
132 // 10ms is preferred for lowest possible delay.
128 133
134 media::AudioParameters source_params;
129 int buffer_size = 0; 135 int buffer_size = 0;
130 136
131 // Windows 137 if (sample_rate % 8000 == 0) {
132 #if defined(OS_WIN)
133 // Always use stereo rendering on Windows.
134 channel_layout = media::CHANNEL_LAYOUT_STEREO;
135
136 // Render side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI)
137 // API which was introduced in Windows Vista. For lower Windows versions,
138 // a callback-driven Wave implementation is used instead. An output buffer
139 // size of 10ms works well for WASAPI but 30ms is needed for Wave.
140
141 // Use different buffer sizes depending on the current hardware sample rate.
142 if (sample_rate == 96000 || sample_rate == 48000) {
143 buffer_size = (sample_rate / 100); 138 buffer_size = (sample_rate / 100);
139 } else if (sample_rate == 44100) {
140 // The resampler in WebRTC does not support 441 as input. We hard code
141 // the size to 440 (~0.9977ms) instead and rely on the internal jitter
142 // buffer in WebRTC to deal with the resulting drift.
143 // TODO(henrika): ensure that WebRTC supports 44100Hz and use 441 instead.
144 buffer_size = 440;
144 } else { 145 } else {
145 // We do run at 44.1kHz at the actual audio layer, but ask for frames 146 return false;
146 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.
147 // TODO(henrika): figure out why we seem to need 20ms here for glitch-
148 // free audio.
149 buffer_size = 2 * 440;
150 } 147 }
151 148
152 // Windows XP and lower can't cope with 10 ms output buffer size. 149 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
153 // It must be extended to 30 ms (60 ms will be used internally by WaveOut). 150 media::CHANNEL_LAYOUT_STEREO,
154 // Note that we can't use media::CoreAudioUtil::IsSupported() here since it 151 sample_rate, 16, buffer_size);
155 // tries to load the Audioses.dll and it will always fail in the render 152
156 // process. 153 // Set up audio parameters for the sink, i.e., the native audio output stream.
157 if (base::win::GetVersion() < base::win::VERSION_VISTA) { 154 // We strive to open up using native parameters to achieve best possible
158 buffer_size = 3 * buffer_size; 155 // performance and to ensure that no FIFO is needed on the browser side to
159 DLOG(WARNING) << "Extending the output buffer size by a factor of three " 156 // match the client request. Any mismatch between the source and the sink is
160 << "since Windows XP has been detected."; 157 // taken care of in this class instead using a pull FIFO.
158
159 media::AudioParameters sink_params;
160
161 buffer_size = hardware_config->GetOutputBufferSize();
162 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
163 media::CHANNEL_LAYOUT_STEREO,
164 sample_rate, 16, buffer_size);
165
166 // Create a FIFO if re-buffering is required to match the source input with
167 // the sink request. The source acts as provider here and the sink as
168 // consumer.
169 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
170 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
171 << " to " << sink_params.frames_per_buffer();
172 audio_fifo_.reset(new media::AudioPullFifo(
173 source_params.channels(),
174 source_params.frames_per_buffer(),
175 base::Bind(
176 &WebRtcAudioRenderer::SourceCallback,
177 base::Unretained(this))));
161 } 178 }
162 #elif defined(OS_MACOSX)
163 channel_layout = media::CHANNEL_LAYOUT_MONO;
164 179
165 // Render side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback- 180 frame_duration_milliseconds_ = base::Time::kMillisecondsPerSecond /
166 // driven Core Audio implementation. Tests have shown that 10ms is a suitable 181 static_cast<double>(source_params.sample_rate());
167 // frame size to use for 96kHz, 48kHz and 44.1kHz.
168
169 // Use different buffer sizes depending on the current hardware sample rate.
170 if (sample_rate == 96000 || sample_rate == 48000) {
171 buffer_size = (sample_rate / 100);
172 } else {
173 // We do run at 44.1kHz at the actual audio layer, but ask for frames
174 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.
175 buffer_size = 440;
176 }
177 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
178 channel_layout = media::CHANNEL_LAYOUT_MONO;
179
180 // Based on tests using the current ALSA implementation in Chrome, we have
181 // found that 10ms buffer size on the output side works fine.
182 buffer_size = 480;
183 #elif defined(OS_ANDROID)
184 channel_layout = media::CHANNEL_LAYOUT_MONO;
185
186 // The buffer size lower than GetAudioHardwareBufferSize() will lead to
187 // choppy sound because AudioOutputResampler will read the buffer multiple
188 // times in a row without allowing the client to re-fill the buffer.
189 // TODO(dwkang): check if 2048 - GetAudioHardwareBufferSize() is the right
190 // value for Android and do further tuning.
191 buffer_size = 2048;
192 #else
193 DLOG(ERROR) << "Unsupported platform";
194 return false;
195 #endif
196
197 // Store utilized parameters to ensure that we can check them
198 // after a successful initialization.
199 params_.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout,
200 sample_rate, 16, buffer_size);
201 182
202 // Allocate local audio buffers based on the parameters above. 183 // Allocate local audio buffers based on the parameters above.
203 // It is assumed that each audio sample contains 16 bits and each 184 // It is assumed that each audio sample contains 16 bits and each
204 // audio frame contains one or two audio samples depending on the 185 // audio frame contains one or two audio samples depending on the
205 // number of channels. 186 // number of channels.
206 buffer_.reset(new int16[params_.frames_per_buffer() * params_.channels()]); 187 buffer_.reset(
188 new int16[source_params.frames_per_buffer() * source_params.channels()]);
207 189
208 source_ = source; 190 source_ = source;
209 source->SetRenderFormat(params_); 191 source->SetRenderFormat(source_params);
210 192
211 // Configure the audio rendering client and start the rendering. 193 // Configure the audio rendering client and start rendering.
212 sink_->Initialize(params_, this); 194 sink_->Initialize(sink_params, this);
213 sink_->SetSourceRenderView(source_render_view_id_); 195 sink_->SetSourceRenderView(source_render_view_id_);
214 sink_->Start(); 196 sink_->Start();
215 197
198 // User must call Play() before any audio can be heard.
216 state_ = PAUSED; 199 state_ = PAUSED;
217 200
218 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout", 201 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
219 channel_layout, media::CHANNEL_LAYOUT_MAX); 202 source_params.channel_layout(),
203 media::CHANNEL_LAYOUT_MAX);
220 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer", 204 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
221 buffer_size, kUnexpectedAudioBufferSize); 205 source_params.frames_per_buffer(),
222 AddHistogramFramesPerBuffer(buffer_size); 206 kUnexpectedAudioBufferSize);
207 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
223 208
224 return true; 209 return true;
225 } 210 }
226 211
227 void WebRtcAudioRenderer::Start() { 212 void WebRtcAudioRenderer::Start() {
228 // TODO(xians): refactor to make usage of Start/Stop more symmetric. 213 // TODO(xians): refactor to make usage of Start/Stop more symmetric.
229 NOTIMPLEMENTED(); 214 NOTIMPLEMENTED();
230 } 215 }
231 216
232 void WebRtcAudioRenderer::Play() { 217 void WebRtcAudioRenderer::Play() {
218 DVLOG(1) << "WebRtcAudioRenderer::Play()";
219 DCHECK(thread_checker_.CalledOnValidThread());
233 base::AutoLock auto_lock(lock_); 220 base::AutoLock auto_lock(lock_);
234 if (state_ == UNINITIALIZED) 221 if (state_ == UNINITIALIZED)
235 return; 222 return;
236 223
237 DCHECK(play_ref_count_ == 0 || state_ == PLAYING); 224 DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
238 ++play_ref_count_; 225 ++play_ref_count_;
239 state_ = PLAYING; 226 state_ = PLAYING;
227
228 if (audio_fifo_)
229 audio_fifo_->Clear();
240 } 230 }
241 231
242 void WebRtcAudioRenderer::Pause() { 232 void WebRtcAudioRenderer::Pause() {
233 DVLOG(1) << "WebRtcAudioRenderer::Pause()";
234 DCHECK(thread_checker_.CalledOnValidThread());
243 base::AutoLock auto_lock(lock_); 235 base::AutoLock auto_lock(lock_);
244 if (state_ == UNINITIALIZED) 236 if (state_ == UNINITIALIZED)
245 return; 237 return;
246 238
247 DCHECK_EQ(state_, PLAYING); 239 DCHECK_EQ(state_, PLAYING);
248 DCHECK_GT(play_ref_count_, 0); 240 DCHECK_GT(play_ref_count_, 0);
249 if (!--play_ref_count_) 241 if (!--play_ref_count_)
250 state_ = PAUSED; 242 state_ = PAUSED;
251 } 243 }
252 244
253 void WebRtcAudioRenderer::Stop() { 245 void WebRtcAudioRenderer::Stop() {
246 DVLOG(1) << "WebRtcAudioRenderer::Stop()";
247 DCHECK(thread_checker_.CalledOnValidThread());
254 base::AutoLock auto_lock(lock_); 248 base::AutoLock auto_lock(lock_);
255 if (state_ == UNINITIALIZED) 249 if (state_ == UNINITIALIZED)
256 return; 250 return;
257 251
258 source_->RemoveRenderer(this); 252 source_->RemoveRenderer(this);
259 source_ = NULL; 253 source_ = NULL;
260 sink_->Stop(); 254 sink_->Stop();
261 state_ = UNINITIALIZED; 255 state_ = UNINITIALIZED;
262 } 256 }
263 257
264 void WebRtcAudioRenderer::SetVolume(float volume) { 258 void WebRtcAudioRenderer::SetVolume(float volume) {
259 DCHECK(thread_checker_.CalledOnValidThread());
265 base::AutoLock auto_lock(lock_); 260 base::AutoLock auto_lock(lock_);
266 if (state_ == UNINITIALIZED) 261 if (state_ == UNINITIALIZED)
267 return; 262 return;
268 263
269 sink_->SetVolume(volume); 264 sink_->SetVolume(volume);
270 } 265 }
271 266
272 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const { 267 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
273 return base::TimeDelta(); 268 return base::TimeDelta();
274 } 269 }
275 270
276 bool WebRtcAudioRenderer::IsLocalRenderer() const { 271 bool WebRtcAudioRenderer::IsLocalRenderer() const {
277 return false; 272 return false;
278 } 273 }
279 274
280 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus, 275 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
281 int audio_delay_milliseconds) { 276 int audio_delay_milliseconds) {
282 { 277 base::AutoLock auto_lock(lock_);
283 base::AutoLock auto_lock(lock_); 278 if (!source_)
284 if (!source_) 279 return 0;
285 return 0;
286 // We need to keep render data for the |source_| reglardless of |state_|,
287 // otherwise the data will be buffered up inside |source_|.
288 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
289 audio_bus->channels(), audio_bus->frames(),
290 audio_delay_milliseconds);
291 280
292 // Return 0 frames to play out silence if |state_| is not PLAYING. 281 audio_delay_milliseconds_ = audio_delay_milliseconds;
293 if (state_ != PLAYING)
294 return 0;
295 }
296 282
297 // Deinterleave each channel and convert to 32-bit floating-point 283 if (audio_fifo_)
298 // with nominal range -1.0 -> +1.0 to match the callback format. 284 audio_fifo_->Consume(audio_bus, audio_bus->frames());
299 audio_bus->FromInterleaved(buffer_.get(), audio_bus->frames(), 285 else
300 params_.bits_per_sample() / 8); 286 SourceCallback(0, audio_bus);
301 return audio_bus->frames(); 287
288 return (state_ == PLAYING) ? audio_bus->frames() : 0;
302 } 289 }
303 290
304 void WebRtcAudioRenderer::OnRenderError() { 291 void WebRtcAudioRenderer::OnRenderError() {
305 NOTIMPLEMENTED(); 292 NOTIMPLEMENTED();
306 LOG(ERROR) << "OnRenderError()"; 293 LOG(ERROR) << "OnRenderError()";
307 } 294 }
308 295
296 // Called by AudioPullFifo when more data is necessary.
297 void WebRtcAudioRenderer::SourceCallback(
298 int fifo_frame_delay, media::AudioBus* audio_bus) {
299 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
300 << fifo_frame_delay << ", "
301 << audio_bus->frames() << ")";
302
303 audio_delay_milliseconds_ += frame_duration_milliseconds_ * fifo_frame_delay;
304 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds_;
305
306 // We need to keep render data for the |source_| regardless of |state_|,
307 // otherwise the data will be buffered up inside |source_|.
308 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
309 audio_bus->channels(), audio_bus->frames(),
310 audio_delay_milliseconds_);
311
312 // Avoid filling up the audio bus if we are not playing; instead
313 // return here and ensure that the returned value in Render() is 0.
314 if (state_ != PLAYING)
DaleCurtis 2013/02/02 00:19:51 you may want to audio_bus->Zero() here.
henrika (OOO until Aug 14) 2013/02/04 08:25:38 Thanks.
315 return;
316
317 // De-interleave each channel and convert to 32-bit floating-point
318 // with nominal range -1.0 -> +1.0 to match the callback format.
319 audio_bus->FromInterleaved(buffer_.get(),
320 audio_bus->frames(),
321 sizeof(buffer_[0]));
322 }
323
309 } // namespace content 324 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698