content/renderer/media/webrtc_audio_renderer.cc - Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio

Side by Side Diff: content/renderer/media/webrtc_audio_renderer.cc

Issue 12049070: Avoids irregular OnMoreData callbacks on Windows using Core Audio (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Non trivial rebase Created 7 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/renderer/media/webrtc_audio_renderer.h"	5 #include "content/renderer/media/webrtc_audio_renderer.h"

6	6

7 #include "base/logging.h"	7 #include "base/logging.h"

8 #include "base/metrics/histogram.h"	8 #include "base/metrics/histogram.h"

9 #include "base/string_util.h"	9 #include "base/string_util.h"

10 #include "content/renderer/media/audio_device_factory.h"	10 #include "content/renderer/media/audio_device_factory.h"

11 #include "content/renderer/media/renderer_audio_output_device.h"	11 #include "content/renderer/media/renderer_audio_output_device.h"

12 #include "content/renderer/media/webrtc_audio_device_impl.h"	12 #include "content/renderer/media/webrtc_audio_device_impl.h"

13 #include "content/renderer/render_thread_impl.h"	13 #include "content/renderer/render_thread_impl.h"

14 #include "media/audio/audio_util.h"	14 #include "media/audio/audio_parameters.h"

15 #include "media/audio/sample_rates.h"	15 #include "media/audio/sample_rates.h"

16 #include "media/base/audio_hardware_config.h"	16 #include "media/base/audio_hardware_config.h"

17	17

18 #if defined(OS_WIN)	18 #if defined(OS_WIN)

19 #include "base/win/windows_version.h"	19 #include "base/win/windows_version.h"

20 #include "media/audio/win/core_audio_util_win.h"	20 #include "media/audio/win/core_audio_util_win.h"

21 #endif	21 #endif

22	22

23 namespace content {	23 namespace content {

24	24

25 namespace {	25 namespace {

26	26

27 // Supported hardware sample rates for output sides.	27 // Supported hardware sample rates for output sides.

28 #if defined(OS_WIN) \|\| defined(OS_MACOSX)	28 #if defined(OS_WIN) \|\| defined(OS_MACOSX)

29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its	29 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its

30 // current sample rate (set by the user) on Windows and Mac OS X. The listed	30 // current sample rate (set by the user) on Windows and Mac OS X. The listed

31 // rates below adds restrictions and Initialize() will fail if the user selects	31 // rates below adds restrictions and Initialize() will fail if the user selects

32 // any rate outside these ranges.	32 // any rate outside these ranges.

33 int kValidOutputRates[] = {96000, 48000, 44100};	33 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};

34 #elif defined(OS_LINUX) \|\| defined(OS_OPENBSD)	34 #elif defined(OS_LINUX) \|\| defined(OS_OPENBSD)

35 int kValidOutputRates[] = {48000, 44100};	35 const int kValidOutputRates[] = {48000, 44100};

36 #elif defined(OS_ANDROID)	36 #elif defined(OS_ANDROID)

37 // On Android, the most popular sampling rate is 16000.	37 // On Android, the most popular sampling rate is 16000.

38 int kValidOutputRates[] = {48000, 44100, 16000};	38 const int kValidOutputRates[] = {48000, 44100, 16000};

39 #else	39 #else

40 int kValidOutputRates[] = {44100};	40 const int kValidOutputRates[] = {44100};

41 #endif	41 #endif

42	42

43 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.	43 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.

44 enum AudioFramesPerBuffer {	44 enum AudioFramesPerBuffer {

45 k160,	45 k160,

46 k320,	46 k320,

47 k440, // WebRTC works internally with 440 audio frames at 44.1kHz.	47 k440, // WebRTC works internally with 440 audio frames at 44.1kHz.

48 k480,	48 k480,

49 k640,	49 k640,

50 k880,	50 k880,

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 } // namespace	85 } // namespace

86	86

87 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)	87 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)

88 : state_(UNINITIALIZED),	88 : state_(UNINITIALIZED),

89 source_render_view_id_(source_render_view_id),	89 source_render_view_id_(source_render_view_id),

90 source_(NULL),	90 source_(NULL),

91 play_ref_count_(0) {	91 play_ref_count_(0) {

92 }	92 }

93	93

94 WebRtcAudioRenderer::~WebRtcAudioRenderer() {	94 WebRtcAudioRenderer::~WebRtcAudioRenderer() {

	95 DCHECK(thread_checker_.CalledOnValidThread());

95 DCHECK_EQ(state_, UNINITIALIZED);	96 DCHECK_EQ(state_, UNINITIALIZED);

96 buffer_.reset();	97 buffer_.reset();

97 }	98 }

98	99

99 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {	100 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {

	101 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";

	102 DCHECK(thread_checker_.CalledOnValidThread());

100 base::AutoLock auto_lock(lock_);	103 base::AutoLock auto_lock(lock_);

101 DCHECK_EQ(state_, UNINITIALIZED);	104 DCHECK_EQ(state_, UNINITIALIZED);

102 DCHECK(source);	105 DCHECK(source);

103 DCHECK(!sink_);	106 DCHECK(!sink_);

104 DCHECK(!source_);	107 DCHECK(!source_);

105	108

106 sink_ = AudioDeviceFactory::NewOutputDevice();	109 sink_ = AudioDeviceFactory::NewOutputDevice();

107 DCHECK(sink_);	110 DCHECK(sink_);

108	111

109 // Ask the renderer for the default audio output hardware sample-rate.	112 // Ask the renderer for the default audio output hardware sample-rate.

110 media::AudioHardwareConfig* hardware_config =	113 media::AudioHardwareConfig* hardware_config =

111 RenderThreadImpl::current()->GetAudioHardwareConfig();	114 RenderThreadImpl::current()->GetAudioHardwareConfig();

112 int sample_rate = hardware_config->GetOutputSampleRate();	115 int sample_rate = hardware_config->GetOutputSampleRate();

113 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;	116 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;

114 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputSampleRate",	117 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputSampleRate",

115 sample_rate, media::kUnexpectedAudioSampleRate);	118 sample_rate, media::kUnexpectedAudioSampleRate);

116	119

117 // Verify that the reported output hardware sample rate is supported	120 // Verify that the reported output hardware sample rate is supported

118 // on the current platform.	121 // on the current platform.

119 if (std::find(&kValidOutputRates[0],	122 if (std::find(&kValidOutputRates[0],

120 &kValidOutputRates[0] + arraysize(kValidOutputRates),	123 &kValidOutputRates[0] + arraysize(kValidOutputRates),

121 sample_rate) ==	124 sample_rate) ==

122 &kValidOutputRates[arraysize(kValidOutputRates)]) {	125 &kValidOutputRates[arraysize(kValidOutputRates)]) {

123 DLOG(ERROR) << sample_rate << " is not a supported output rate.";	126 DLOG(ERROR) << sample_rate << " is not a supported output rate.";

124 return false;	127 return false;

125 }	128 }

126	129

127 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;	130 // Set up audio parameters for the source, i.e., the WebRTC client.

	131 // The WebRTC client only supports multiples of 10ms as buffer size where

	132 // 10ms is preferred for lowest possible delay.

128	133

	134 media::AudioParameters source_params;

129 int buffer_size = 0;	135 int buffer_size = 0;

130	136

131 // Windows	137 if (sample_rate % 8000 == 0) {

132 #if defined(OS_WIN)

133 // Always use stereo rendering on Windows.

134 channel_layout = media::CHANNEL_LAYOUT_STEREO;

135

136 // Render side: AUDIO_PCM_LOW_LATENCY is based on the Core Audio (WASAPI)

137 // API which was introduced in Windows Vista. For lower Windows versions,

138 // a callback-driven Wave implementation is used instead. An output buffer

139 // size of 10ms works well for WASAPI but 30ms is needed for Wave.

140

141 // Use different buffer sizes depending on the current hardware sample rate.

142 if (sample_rate == 96000 \|\| sample_rate == 48000) {

143 buffer_size = (sample_rate / 100);	138 buffer_size = (sample_rate / 100);

	139 } else if (sample_rate == 44100) {

	140 // The resampler in WebRTC does not support 441 as input. We hard code

	141 // the size to 440 (~0.9977ms) instead and rely on the internal jitter

	142 // buffer in WebRTC to deal with the resulting drift.

	143 // TODO(henrika): ensure that WebRTC supports 44100Hz and use 441 instead.

	144 buffer_size = 440;

144 } else {	145 } else {

145 // We do run at 44.1kHz at the actual audio layer, but ask for frames	146 return false;

146 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

147 // TODO(henrika): figure out why we seem to need 20ms here for glitch-

148 // free audio.

149 buffer_size = 2 * 440;

150 }	147 }

151	148

152 // Windows XP and lower can't cope with 10 ms output buffer size.	149 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

153 // It must be extended to 30 ms (60 ms will be used internally by WaveOut).	150 media::CHANNEL_LAYOUT_STEREO,

154 // Note that we can't use media::CoreAudioUtil::IsSupported() here since it	151 sample_rate, 16, buffer_size);

155 // tries to load the Audioses.dll and it will always fail in the render	152

156 // process.	153 // Set up audio parameters for the sink, i.e., the native audio output stream.

157 if (base::win::GetVersion() < base::win::VERSION_VISTA) {	154 // We strive to open up using native parameters to achieve best possible

158 buffer_size = 3 * buffer_size;	155 // performance and to ensure that no FIFO is needed on the browser side to

159 DLOG(WARNING) << "Extending the output buffer size by a factor of three "	156 // match the client request. Any mismatch between the source and the sink is

160 << "since Windows XP has been detected.";	157 // taken care of in this class instead using a pull FIFO.

	158

	159 media::AudioParameters sink_params;

	160

	161 buffer_size = hardware_config->GetOutputBufferSize();

	162 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

	163 media::CHANNEL_LAYOUT_STEREO,

	164 sample_rate, 16, buffer_size);

	165

	166 // Create a FIFO if re-buffering is required to match the source input with

	167 // the sink request. The source acts as provider here and the sink as

	168 // consumer.

	169 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {

	170 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()

	171 << " to " << sink_params.frames_per_buffer();

	172 audio_fifo_.reset(new media::AudioPullFifo(

	173 source_params.channels(),

	174 source_params.frames_per_buffer(),

	175 base::Bind(

	176 &WebRtcAudioRenderer::SourceCallback,

	177 base::Unretained(this))));

161 }	178 }

162 #elif defined(OS_MACOSX)

163 channel_layout = media::CHANNEL_LAYOUT_MONO;

164	179

165 // Render side: AUDIO_PCM_LOW_LATENCY on Mac OS X is based on a callback-	180 frame_duration_milliseconds_ = base::Time::kMillisecondsPerSecond /

166 // driven Core Audio implementation. Tests have shown that 10ms is a suitable	181 static_cast<double>(source_params.sample_rate());

167 // frame size to use for 96kHz, 48kHz and 44.1kHz.

168

169 // Use different buffer sizes depending on the current hardware sample rate.

170 if (sample_rate == 96000 \|\| sample_rate == 48000) {

171 buffer_size = (sample_rate / 100);

172 } else {

173 // We do run at 44.1kHz at the actual audio layer, but ask for frames

174 // at 44.0kHz to ensure that we can feed them to the webrtc::VoiceEngine.

175 buffer_size = 440;

176 }

177 #elif defined(OS_LINUX) \|\| defined(OS_OPENBSD)

178 channel_layout = media::CHANNEL_LAYOUT_MONO;

179

180 // Based on tests using the current ALSA implementation in Chrome, we have

181 // found that 10ms buffer size on the output side works fine.

182 buffer_size = 480;

183 #elif defined(OS_ANDROID)

184 channel_layout = media::CHANNEL_LAYOUT_MONO;

185

186 // The buffer size lower than GetAudioHardwareBufferSize() will lead to

187 // choppy sound because AudioOutputResampler will read the buffer multiple

188 // times in a row without allowing the client to re-fill the buffer.

189 // TODO(dwkang): check if 2048 - GetAudioHardwareBufferSize() is the right

190 // value for Android and do further tuning.

191 buffer_size = 2048;

192 #else

193 DLOG(ERROR) << "Unsupported platform";

194 return false;

195 #endif

196

197 // Store utilized parameters to ensure that we can check them

198 // after a successful initialization.

199 params_.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout,

200 sample_rate, 16, buffer_size);

201	182

202 // Allocate local audio buffers based on the parameters above.	183 // Allocate local audio buffers based on the parameters above.

203 // It is assumed that each audio sample contains 16 bits and each	184 // It is assumed that each audio sample contains 16 bits and each

204 // audio frame contains one or two audio samples depending on the	185 // audio frame contains one or two audio samples depending on the

205 // number of channels.	186 // number of channels.

206 buffer_.reset(new int16[params_.frames_per_buffer() * params_.channels()]);	187 buffer_.reset(

	188 new int16[source_params.frames_per_buffer() * source_params.channels()]);

207	189

208 source_ = source;	190 source_ = source;

209 source->SetRenderFormat(params_);	191 source->SetRenderFormat(source_params);

210	192

211 // Configure the audio rendering client and start the rendering.	193 // Configure the audio rendering client and start rendering.

212 sink_->Initialize(params_, this);	194 sink_->Initialize(sink_params, this);

213 sink_->SetSourceRenderView(source_render_view_id_);	195 sink_->SetSourceRenderView(source_render_view_id_);

214 sink_->Start();	196 sink_->Start();

215	197

	198 // User must call Play() before any audio can be heard.

216 state_ = PAUSED;	199 state_ = PAUSED;

217	200

218 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",	201 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",

219 channel_layout, media::CHANNEL_LAYOUT_MAX);	202 source_params.channel_layout(),

	203 media::CHANNEL_LAYOUT_MAX);

220 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",	204 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",

221 buffer_size, kUnexpectedAudioBufferSize);	205 source_params.frames_per_buffer(),

222 AddHistogramFramesPerBuffer(buffer_size);	206 kUnexpectedAudioBufferSize);

	207 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());

223	208

224 return true;	209 return true;

225 }	210 }

226	211

227 void WebRtcAudioRenderer::Start() {	212 void WebRtcAudioRenderer::Start() {

228 // TODO(xians): refactor to make usage of Start/Stop more symmetric.	213 // TODO(xians): refactor to make usage of Start/Stop more symmetric.

229 NOTIMPLEMENTED();	214 NOTIMPLEMENTED();

230 }	215 }

231	216

232 void WebRtcAudioRenderer::Play() {	217 void WebRtcAudioRenderer::Play() {

	218 DVLOG(1) << "WebRtcAudioRenderer::Play()";

	219 DCHECK(thread_checker_.CalledOnValidThread());

233 base::AutoLock auto_lock(lock_);	220 base::AutoLock auto_lock(lock_);

234 if (state_ == UNINITIALIZED)	221 if (state_ == UNINITIALIZED)

235 return;	222 return;

236	223

237 DCHECK(play_ref_count_ == 0 \|\| state_ == PLAYING);	224 DCHECK(play_ref_count_ == 0 \|\| state_ == PLAYING);

238 ++play_ref_count_;	225 ++play_ref_count_;

239 state_ = PLAYING;	226 state_ = PLAYING;

	227

	228 if (audio_fifo_)

	229 audio_fifo_->Clear();

240 }	230 }

241	231

242 void WebRtcAudioRenderer::Pause() {	232 void WebRtcAudioRenderer::Pause() {

	233 DVLOG(1) << "WebRtcAudioRenderer::Pause()";

	234 DCHECK(thread_checker_.CalledOnValidThread());

243 base::AutoLock auto_lock(lock_);	235 base::AutoLock auto_lock(lock_);

244 if (state_ == UNINITIALIZED)	236 if (state_ == UNINITIALIZED)

245 return;	237 return;

246	238

247 DCHECK_EQ(state_, PLAYING);	239 DCHECK_EQ(state_, PLAYING);

248 DCHECK_GT(play_ref_count_, 0);	240 DCHECK_GT(play_ref_count_, 0);

249 if (!--play_ref_count_)	241 if (!--play_ref_count_)

250 state_ = PAUSED;	242 state_ = PAUSED;

251 }	243 }

252	244

253 void WebRtcAudioRenderer::Stop() {	245 void WebRtcAudioRenderer::Stop() {

	246 DVLOG(1) << "WebRtcAudioRenderer::Stop()";

	247 DCHECK(thread_checker_.CalledOnValidThread());

254 base::AutoLock auto_lock(lock_);	248 base::AutoLock auto_lock(lock_);

255 if (state_ == UNINITIALIZED)	249 if (state_ == UNINITIALIZED)

256 return;	250 return;

257	251

258 source_->RemoveRenderer(this);	252 source_->RemoveRenderer(this);

259 source_ = NULL;	253 source_ = NULL;

260 sink_->Stop();	254 sink_->Stop();

261 state_ = UNINITIALIZED;	255 state_ = UNINITIALIZED;

262 }	256 }

263	257

264 void WebRtcAudioRenderer::SetVolume(float volume) {	258 void WebRtcAudioRenderer::SetVolume(float volume) {

	259 DCHECK(thread_checker_.CalledOnValidThread());

265 base::AutoLock auto_lock(lock_);	260 base::AutoLock auto_lock(lock_);

266 if (state_ == UNINITIALIZED)	261 if (state_ == UNINITIALIZED)

267 return;	262 return;

268	263

269 sink_->SetVolume(volume);	264 sink_->SetVolume(volume);

270 }	265 }

271	266

272 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {	267 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {

273 return base::TimeDelta();	268 return base::TimeDelta();

274 }	269 }

275	270

276 bool WebRtcAudioRenderer::IsLocalRenderer() const {	271 bool WebRtcAudioRenderer::IsLocalRenderer() const {

277 return false;	272 return false;

278 }	273 }

279	274

280 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,	275 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,

281 int audio_delay_milliseconds) {	276 int audio_delay_milliseconds) {

282 {	277 base::AutoLock auto_lock(lock_);

283 base::AutoLock auto_lock(lock_);	278 if (!source_)

284 if (!source_)	279 return 0;

285 return 0;

286 // We need to keep render data for the \|source_\| reglardless of \|state_\|,

287 // otherwise the data will be buffered up inside \|source_\|.

288 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),

289 audio_bus->channels(), audio_bus->frames(),

290 audio_delay_milliseconds);

291	280

292 // Return 0 frames to play out silence if \|state_\| is not PLAYING.	281 audio_delay_milliseconds_ = audio_delay_milliseconds;

293 if (state_ != PLAYING)

294 return 0;

295 }

296	282

297 // Deinterleave each channel and convert to 32-bit floating-point	283 if (audio_fifo_)

298 // with nominal range -1.0 -> +1.0 to match the callback format.	284 audio_fifo_->Consume(audio_bus, audio_bus->frames());

299 audio_bus->FromInterleaved(buffer_.get(), audio_bus->frames(),	285 else

300 params_.bits_per_sample() / 8);	286 SourceCallback(0, audio_bus);

301 return audio_bus->frames();	287

	288 return (state_ == PLAYING) ? audio_bus->frames() : 0;

302 }	289 }

303	290

304 void WebRtcAudioRenderer::OnRenderError() {	291 void WebRtcAudioRenderer::OnRenderError() {

305 NOTIMPLEMENTED();	292 NOTIMPLEMENTED();

306 LOG(ERROR) << "OnRenderError()";	293 LOG(ERROR) << "OnRenderError()";

307 }	294 }

308	295

	296 // Called by AudioPullFifo when more data is necessary.

	297 void WebRtcAudioRenderer::SourceCallback(

	298 int fifo_frame_delay, media::AudioBus* audio_bus) {

	299 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("

	300 << fifo_frame_delay << ", "

	301 << audio_bus->frames() << ")";

	302

	303 audio_delay_milliseconds_ += frame_duration_milliseconds_ * fifo_frame_delay;

	304 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds_;

	305

	306 // We need to keep render data for the \|source_\| regardless of \|state_\|,

	307 // otherwise the data will be buffered up inside \|source_\|.

	308 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),

	309 audio_bus->channels(), audio_bus->frames(),

	310 audio_delay_milliseconds_);

	311

	312 // Avoid filling up the audio bus if we are not playing; instead

	313 // return here and ensure that the returned value in Render() is 0.

	314 if (state_ != PLAYING)
	DaleCurtis 2013/02/02 00:19:51 you may want to audio_bus->Zero() here. you may want to audio_bus->Zero() here. henrika (OOO until Aug 14) 2013/02/04 08:25:38 Thanks. Thanks.
	315 return;

	316

	317 // De-interleave each channel and convert to 32-bit floating-point

	318 // with nominal range -1.0 -> +1.0 to match the callback format.

	319 audio_bus->FromInterleaved(buffer_.get(),

	320 audio_bus->frames(),

	321 sizeof(buffer_[0]));

	322 }

	323

309 } // namespace content	324 } // namespace content

OLD	NEW

« no previous file with comments | « content/renderer/media/webrtc_audio_renderer.h ('k') | media/audio/audio_util.cc » ('j') | media/audio/audio_util.cc » ('J')