content/renderer/media/webrtc_audio_renderer.cc - Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio

Side by Side Diff: content/renderer/media/webrtc_audio_renderer.cc

Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Moved FillRenderEndpointBufferWithSilence to CoreAudioUtil Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/renderer/media/webrtc_audio_renderer.h"	5 #include "content/renderer/media/webrtc_audio_renderer.h"

6	6

7 #include "base/logging.h"	7 #include "base/logging.h"

8 #include "base/metrics/histogram.h"	8 #include "base/metrics/histogram.h"

9 #include "base/string_util.h"	9 #include "base/string_util.h"

10 #include "content/renderer/media/audio_device_factory.h"	10 #include "content/renderer/media/audio_device_factory.h"

11 #include "content/renderer/media/audio_hardware.h"	11 #include "content/renderer/media/audio_hardware.h"

12 #include "content/renderer/media/renderer_audio_output_device.h"	12 #include "content/renderer/media/renderer_audio_output_device.h"

13 #include "content/renderer/media/webrtc_audio_device_impl.h"	13 #include "content/renderer/media/webrtc_audio_device_impl.h"

	14 #include "media/audio/audio_parameters.h"

14 #include "media/audio/audio_util.h"	15 #include "media/audio/audio_util.h"

15 #include "media/audio/sample_rates.h"	16 #include "media/audio/sample_rates.h"

16 #if defined(OS_WIN)	17 #if defined(OS_WIN)

17 #include "base/win/windows_version.h"	18 #include "base/win/windows_version.h"

18 #include "media/audio/win/core_audio_util_win.h"	19 #include "media/audio/win/core_audio_util_win.h"

19 #endif	20 #endif

20	21

21 namespace content {	22 namespace content {

22	23

23 namespace {	24 namespace {

24	25

25 // Supported hardware sample rates for output sides.	26 // Supported hardware sample rates for output sides.

26 #if defined(OS_WIN) \|\| defined(OS_MACOSX)	27 #if defined(OS_WIN) \|\| defined(OS_MACOSX)

27 // media::GetAudioOutputHardwareSampleRate() asks the audio layer	28 // media::GetAudioOutputHardwareSampleRate() asks the audio layer

28 // for its current sample rate (set by the user) on Windows and Mac OS X.	29 // for its current sample rate (set by the user) on Windows and Mac OS X.

29 // The listed rates below adds restrictions and Initialize()	30 // The listed rates below adds restrictions and Initialize()

30 // will fail if the user selects any rate outside these ranges.	31 // will fail if the user selects any rate outside these ranges.

31 int kValidOutputRates[] = {96000, 48000, 44100};	32 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};

32 #elif defined(OS_LINUX) \|\| defined(OS_OPENBSD)	33 #elif defined(OS_LINUX) \|\| defined(OS_OPENBSD)

33 int kValidOutputRates[] = {48000, 44100};	34 const int kValidOutputRates[] = {48000, 44100};

34 #elif defined(OS_ANDROID)	35 #elif defined(OS_ANDROID)

35 // On Android, the most popular sampling rate is 16000.	36 // On Android, the most popular sampling rate is 16000.

36 int kValidOutputRates[] = {48000, 44100, 16000};	37 const int kValidOutputRates[] = {48000, 44100, 16000};

37 #else	38 #else

38 int kValidOutputRates[] = {44100};	39 const int kValidOutputRates[] = {44100};

39 #endif	40 #endif

40	41

41 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.	42 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.

42 enum AudioFramesPerBuffer {	43 enum AudioFramesPerBuffer {

43 k160,	44 k160,

44 k320,	45 k320,

45 k440, // WebRTC works internally with 440 audio frames at 44.1kHz.	46 k440, // WebRTC works internally with 440 audio frames at 44.1kHz.

46 k480,	47 k480,

47 k640,	48 k640,

48 k880,	49 k880,

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
83 } // namespace	84 } // namespace

84	85

85 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)	86 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)

86 : state_(UNINITIALIZED),	87 : state_(UNINITIALIZED),

87 source_render_view_id_(source_render_view_id),	88 source_render_view_id_(source_render_view_id),

88 source_(NULL),	89 source_(NULL),

89 play_ref_count_(0) {	90 play_ref_count_(0) {

90 }	91 }

91	92

92 WebRtcAudioRenderer::~WebRtcAudioRenderer() {	93 WebRtcAudioRenderer::~WebRtcAudioRenderer() {

	94 DCHECK(thread_checker_.CalledOnValidThread());

93 DCHECK_EQ(state_, UNINITIALIZED);	95 DCHECK_EQ(state_, UNINITIALIZED);

94 buffer_.reset();	96 buffer_.reset();

95 }	97 }

96	98

97 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {	99 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {

	100 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";

	101 DCHECK(thread_checker_.CalledOnValidThread());

98 base::AutoLock auto_lock(lock_);	102 base::AutoLock auto_lock(lock_);

99 DCHECK_EQ(state_, UNINITIALIZED);	103 DCHECK_EQ(state_, UNINITIALIZED);

100 DCHECK(source);	104 DCHECK(source);

101 DCHECK(!sink_);	105 DCHECK(!sink_);

102 DCHECK(!source_);	106 DCHECK(!source_);

103	107

104 sink_ = AudioDeviceFactory::NewOutputDevice();	108 sink_ = AudioDeviceFactory::NewOutputDevice();

105 DCHECK(sink_);	109 DCHECK(sink_);

106	110

107 // Ask the browser for the default audio output hardware sample-rate.	111 // Ask the browser for the default audio output hardware sample-rate.

108 // This request is based on a synchronous IPC message.	112 // This request is based on a synchronous IPC message.

109 int sample_rate = GetAudioOutputSampleRate();	113 int sample_rate = GetAudioOutputSampleRate();

110 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;	114 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;

111 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputSampleRate",	115 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputSampleRate",

112 sample_rate, media::kUnexpectedAudioSampleRate);	116 sample_rate, media::kUnexpectedAudioSampleRate);

113	117

114 // Verify that the reported output hardware sample rate is supported	118 // Verify that the reported output hardware sample rate is supported

115 // on the current platform.	119 // on the current platform.

116 if (std::find(&kValidOutputRates[0],	120 if (std::find(&kValidOutputRates[0],

117 &kValidOutputRates[0] + arraysize(kValidOutputRates),	121 &kValidOutputRates[0] + arraysize(kValidOutputRates),

118 sample_rate) ==	122 sample_rate) ==

119 &kValidOutputRates[arraysize(kValidOutputRates)]) {	123 &kValidOutputRates[arraysize(kValidOutputRates)]) {

120 DLOG(ERROR) << sample_rate << " is not a supported output rate.";	124 DLOG(ERROR) << sample_rate << " is not a supported output rate.";

121 return false;	125 return false;

122 }	126 }

123	127

124 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;	128 // Set up audio parameters for the source, i.e., the WebRTC client.

	129 // The WebRTC client only supports multiples of 10ms as buffer size where

	130 // 10ms is preferred for lowest possible delay.

125	131

	132 media::AudioParameters source_params;

126 int buffer_size = 0;	133 int buffer_size = 0;

127	134

128 // Windows	135 if (sample_rate % 8000 == 0) {

129 #if defined(OS_WIN)

130 // Always use stereo rendering on Windows.

131 channel_layout = media::CHANNEL_LAYOUT_STEREO;

132

133 // Render side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI)

134 // API which was introduced in Windows Vista. For lower Windows versions,

135 // a callback-driven Wave implementation is used instead. An output buffer

136 // size of 10ms works well for WASAPI but 30ms is needed for Wave.

137

138 // Use different buffer sizes depending on the current hardware sample rate.

139 if (sample_rate == 96000 \|\| sample_rate == 48000) {

140 buffer_size = (sample_rate / 100);	136 buffer_size = (sample_rate / 100);

	137 } else if (sample_rate == 44100) {

	138 // The resampler in WebRTC does not support 441 as input. We hard code

	139 // the size to 440 (~0.9977ms) instead and rely on the internal jitter

	140 // buffer in WebRTC to deal with the resulting drift.

	141 // TODO(henrika): ensure that WebRTC supports 44100Hz and use 441 instead.

	142 buffer_size = 440;

141 } else {	143 } else {

142 // We do run at 44.1kHz at the actual audio layer, but ask for frames	144 return false;

143 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

144 // TODO(henrika): figure out why we seem to need 20ms here for glitch-

145 // free audio.

146 buffer_size = 2 * 440;

147 }	145 }

148	146

149 // Windows XP and lower can't cope with 10 ms output buffer size.	147 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

150 // It must be extended to 30 ms (60 ms will be used internally by WaveOut).	148 media::CHANNEL_LAYOUT_STEREO,

151 // Note that we can't use media::CoreAudioUtil::IsSupported() here since it	149 sample_rate, 16, buffer_size);

152 // tries to load the Audioses.dll and it will always fail in the render

153 // process.

154 if (base::win::GetVersion() < base::win::VERSION_VISTA) {

155 buffer_size = 3 * buffer_size;

156 DLOG(WARNING) << "Extending the output buffer size by a factor of three "

157 << "since Windows XP has been detected.";

158 }

159 #elif defined(OS_MACOSX)

160 channel_layout = media::CHANNEL_LAYOUT_MONO;

161	150

162 // Render side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback-	151 // Set up audio parameters for the sink, i.e., the native audio output stream.

163 // driven Core Audio implementation. Tests have shown that 10ms is a suitable	152 // We strive to open up using native parameters to achieve best possible

164 // frame size to use for 96kHz, 48kHz and 44.1kHz.	153 // performance and to ensure that no FIFO is needed on the browser side to

	154 // match the client request. Any mismatch between the source and the sink is

	155 // taken care of in this class instead using a pull FIFO.

165	156

166 // Use different buffer sizes depending on the current hardware sample rate.	157 media::AudioParameters sink_params;

167 if (sample_rate == 96000 \|\| sample_rate == 48000) {

168 buffer_size = (sample_rate / 100);

169 } else {

170 // We do run at 44.1kHz at the actual audio layer, but ask for frames

171 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

172 buffer_size = 440;

173 }

174 #elif defined(OS_LINUX) \|\| defined(OS_OPENBSD)

175 channel_layout = media::CHANNEL_LAYOUT_MONO;

176	158

177 // Based on tests using the current ALSA implementation in Chrome, we have	159 #if defined(OS_WIN)

178 // found that 10ms buffer size on the output side works fine.	160 // TODO(henrika): sort out Windows XP support.

179 buffer_size = 480;

180 #elif defined(OS_ANDROID)

181 channel_layout = media::CHANNEL_LAYOUT_MONO;

182

183 // The buffer size lower than GetAudioHardwareBufferSize() will lead to

184 // choppy sound because AudioOutputResampler will read the buffer multiple

185 // times in a row without allowing the client to re-fill the buffer.

186 // TODO(dwkang): check if 2048 - GetAudioHardwareBufferSize() is the right

187 // value for Android and do further tuning.

188 buffer_size = 2048;

189 #else

190 DLOG(ERROR) << "Unsupported platform";

191 return false;

192 #endif	161 #endif

193	162

194 // Store utilized parameters to ensure that we can check them	163 buffer_size = GetAudioOutputBufferSize();

195 // after a successful initialization.	164 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

196 params_.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout,	165 media::CHANNEL_LAYOUT_STEREO,

197 sample_rate, 16, buffer_size);	166 sample_rate, 16, buffer_size);

	167

	168 // Create a FIFO if re-buffering is required to match the source input with

	169 // the sink request. The source acts as provider here and the sink as

	170 // consumer.

	171 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {

	172 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()

	173 << " to " << sink_params.frames_per_buffer();

	174 audio_fifo_.reset(new media::AudioPullFifo(

	175 source_params.channels(),

	176 source_params.frames_per_buffer(),

	177 base::Bind(

	178 &WebRtcAudioRenderer::SourceCallback,

	179 base::Unretained(this))));

	180 }

	181

	182 frame_duration_milliseconds_ = base::Time::kMillisecondsPerSecond /

	183 static_cast<double>(source_params.sample_rate());

198	184

199 // Allocate local audio buffers based on the parameters above.	185 // Allocate local audio buffers based on the parameters above.

200 // It is assumed that each audio sample contains 16 bits and each	186 // It is assumed that each audio sample contains 16 bits and each

201 // audio frame contains one or two audio samples depending on the	187 // audio frame contains one or two audio samples depending on the

202 // number of channels.	188 // number of channels.

203 buffer_.reset(new int16[params_.frames_per_buffer() * params_.channels()]);	189 buffer_.reset(

	190 new int16[source_params.frames_per_buffer() * source_params.channels()]);

204	191

205 source_ = source;	192 source_ = source;

206 source->SetRenderFormat(params_);	193 source->SetRenderFormat(source_params);

207	194

208 // Configure the audio rendering client and start the rendering.	195 // Configure the audio rendering client and start rendering.

209 sink_->Initialize(params_, this);	196 sink_->Initialize(sink_params, this);

210 sink_->SetSourceRenderView(source_render_view_id_);	197 sink_->SetSourceRenderView(source_render_view_id_);

211 sink_->Start();	198 sink_->Start();

212	199

	200 // User must call Play() before any audio can be heard.

213 state_ = PAUSED;	201 state_ = PAUSED;

214	202

215 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",	203 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",

216 channel_layout, media::CHANNEL_LAYOUT_MAX);	204 source_params.channel_layout(),

	205 media::CHANNEL_LAYOUT_MAX);

217 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",	206 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",

218 buffer_size, kUnexpectedAudioBufferSize);	207 source_params.frames_per_buffer(),

219 AddHistogramFramesPerBuffer(buffer_size);	208 kUnexpectedAudioBufferSize);

	209 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());

220	210

221 return true;	211 return true;

222 }	212 }

223	213

224 void WebRtcAudioRenderer::Start() {	214 void WebRtcAudioRenderer::Start() {

225 // TODO(xians): refactor to make usage of Start/Stop more symmetric.	215 // TODO(xians): refactor to make usage of Start/Stop more symmetric.

226 NOTIMPLEMENTED();	216 NOTIMPLEMENTED();

227 }	217 }

228	218

229 void WebRtcAudioRenderer::Play() {	219 void WebRtcAudioRenderer::Play() {

	220 DVLOG(1) << "WebRtcAudioRenderer::Play()";

	221 DCHECK(thread_checker_.CalledOnValidThread());

230 base::AutoLock auto_lock(lock_);	222 base::AutoLock auto_lock(lock_);

231 if (state_ == UNINITIALIZED)	223 if (state_ == UNINITIALIZED)

232 return;	224 return;

233	225

234 DCHECK(play_ref_count_ == 0 \|\| state_ == PLAYING);	226 DCHECK(play_ref_count_ == 0 \|\| state_ == PLAYING);

235 ++play_ref_count_;	227 ++play_ref_count_;

236 state_ = PLAYING;	228 state_ = PLAYING;

237 }	229 }

238	230

239 void WebRtcAudioRenderer::Pause() {	231 void WebRtcAudioRenderer::Pause() {

	232 DVLOG(1) << "WebRtcAudioRenderer::Pause()";

	233 DCHECK(thread_checker_.CalledOnValidThread());

240 base::AutoLock auto_lock(lock_);	234 base::AutoLock auto_lock(lock_);

241 if (state_ == UNINITIALIZED)	235 if (state_ == UNINITIALIZED)

242 return;	236 return;

243	237

244 DCHECK_EQ(state_, PLAYING);	238 DCHECK_EQ(state_, PLAYING);

245 DCHECK_GT(play_ref_count_, 0);	239 DCHECK_GT(play_ref_count_, 0);

246 if (!--play_ref_count_)	240 if (!--play_ref_count_)

247 state_ = PAUSED;	241 state_ = PAUSED;

248 }	242 }

249	243

250 void WebRtcAudioRenderer::Stop() {	244 void WebRtcAudioRenderer::Stop() {

	245 DVLOG(1) << "WebRtcAudioRenderer::Stop()";

	246 DCHECK(thread_checker_.CalledOnValidThread());

251 base::AutoLock auto_lock(lock_);	247 base::AutoLock auto_lock(lock_);

252 if (state_ == UNINITIALIZED)	248 if (state_ == UNINITIALIZED)

253 return;	249 return;

254	250

255 source_->RemoveRenderer(this);	251 source_->RemoveRenderer(this);

256 source_ = NULL;	252 source_ = NULL;

257 sink_->Stop();	253 sink_->Stop();

258 state_ = UNINITIALIZED;	254 state_ = UNINITIALIZED;

259 }	255 }

260	256

261 void WebRtcAudioRenderer::SetVolume(float volume) {	257 void WebRtcAudioRenderer::SetVolume(float volume) {

	258 DCHECK(thread_checker_.CalledOnValidThread());

262 base::AutoLock auto_lock(lock_);	259 base::AutoLock auto_lock(lock_);

263 if (state_ == UNINITIALIZED)	260 if (state_ == UNINITIALIZED)

264 return;	261 return;

265	262

266 sink_->SetVolume(volume);	263 sink_->SetVolume(volume);

267 }	264 }

268	265

269 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {	266 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {

270 return base::TimeDelta();	267 return base::TimeDelta();

271 }	268 }

272	269

273 bool WebRtcAudioRenderer::IsLocalRenderer() const {	270 bool WebRtcAudioRenderer::IsLocalRenderer() const {

274 return false;	271 return false;

275 }	272 }

276	273

277 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,	274 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,

278 int audio_delay_milliseconds) {	275 int audio_delay_milliseconds) {

279 {	276 base::AutoLock auto_lock(lock_);

280 base::AutoLock auto_lock(lock_);	277 if (!source_)

281 if (!source_)	278 return 0;

282 return 0;

283 // We need to keep render data for the \|source_\| reglardless of \|state_\|,

284 // otherwise the data will be buffered up inside \|source_\|.

285 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),

286 audio_bus->channels(), audio_bus->frames(),

287 audio_delay_milliseconds);

288	279

289 // Return 0 frames to play out silence if \|state_\| is not PLAYING.	280 audio_delay_milliseconds_ = audio_delay_milliseconds;

290 if (state_ != PLAYING)

291 return 0;

292 }

293	281

294 // Deinterleave each channel and convert to 32-bit floating-point	282 if (audio_fifo_)

295 // with nominal range -1.0 -> +1.0 to match the callback format.	283 audio_fifo_->Consume(audio_bus, audio_bus->frames());

296 audio_bus->FromInterleaved(buffer_.get(), audio_bus->frames(),	284 else

297 params_.bits_per_sample() / 8);	285 SourceCallback(0, audio_bus);

298 return audio_bus->frames();	286

	287 return (state_ == PLAYING) ? audio_bus->frames() : 0;

299 }	288 }

300	289

301 void WebRtcAudioRenderer::OnRenderError() {	290 void WebRtcAudioRenderer::OnRenderError() {

302 NOTIMPLEMENTED();	291 NOTIMPLEMENTED();

303 LOG(ERROR) << "OnRenderError()";	292 LOG(ERROR) << "OnRenderError()";

304 }	293 }

305	294

	295 // Called by AudioPullFifo when more data is necessary.

	296 void WebRtcAudioRenderer::SourceCallback(

	297 int fifo_frame_delay, media::AudioBus* audio_bus) {

	298 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("

	299 << fifo_frame_delay << ", "

	300 << audio_bus->frames() << ")";

	301

	302 audio_delay_milliseconds_ += frame_duration_milliseconds_ * fifo_frame_delay;

	303 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds_;

	304

	305 // We need to keep render data for the \|source_\| regardless of \|state_\|,

	306 // otherwise the data will be buffered up inside \|source_\|.

	307 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),

	308 audio_bus->channels(), audio_bus->frames(),

	309 audio_delay_milliseconds_);

	310

	311 // Avoid filling up the audio bus if we are not playing; instead

	312 // return here and ensure that the returned value in Render() is 0.

	313 if (state_ != PLAYING)

	314 return;

	315

	316 // De-interleave each channel and convert to 32-bit floating-point

	317 // with nominal range -1.0 -> +1.0 to match the callback format.

	318 audio_bus->FromInterleaved(buffer_.get(),

	319 audio_bus->frames(),

	320 sizeof(buffer_[0]));

	321 }

	322

306 } // namespace content	323 } // namespace content

OLD	NEW