content/renderer/media/webrtc_audio_processor.cc - Issue 54383003: Added an "enable-audio-processor" flag and WebRtcAudioProcessor class

Side by Side Diff: content/renderer/media/webrtc_audio_processor.cc

Issue 54383003: Added an "enable-audio-processor" flag and WebRtcAudioProcessor class (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: addressed the rest of Dale's comments Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « content/renderer/media/webrtc_audio_processor.h ('k') | content/renderer/media/webrtc_audio_processor_unittest.cc » ('j') | content/renderer/media/webrtc_audio_processor_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "content/renderer/media/webrtc_audio_processor.h"

	6

	7 #include "base/command_line.h"

	8 #include "base/debug/trace_event.h"

	9 #include "content/public/common/content_switches.h"

	10 #include "content/renderer/media/webrtc_audio_processor_util.h"

	11 #include "media/audio/audio_parameters.h"

	12 #include "media/base/audio_converter.h"

	13 #include "media/base/audio_fifo.h"

	14 #include "media/base/channel_layout.h"

	15

	16 namespace content {

	17

	18 namespace {

	19

	20 using webrtc::AudioProcessing;

	21 using webrtc::MediaConstraintsInterface;

	22

	23 #if defined(ANDROID)

	24 const int kAudioProcessingSampleRate = 16000;

	25 #else

	26 const int kAudioProcessingSampleRate = 32000;

	27 #endif

	28 const int kAudioProcessingNumberOfChannel = 1;

	29

	30 const int kMaxNumberOfBuffersInFifo = 2;

	31

	32 } // namespace

	33

	34 class WebRtcAudioProcessor::WebRtcAudioConverter

	35 : public media::AudioConverter::InputCallback {

	36 public:

	37 WebRtcAudioConverter(const media::AudioParameters& source_params,

	38 const media::AudioParameters& sink_params)

	39 : source_params_(source_params),

	40 sink_params_(sink_params),

	41 audio_converter_(source_params, sink_params_, false) {

	42 audio_converter_.AddInput(this);

	43 // Create and initialize audio fifo and audio bus wrapper.

	44 // The size of the FIFO should be at least twice of the source buffer size

	45 // or twice of the sink buffer size.

	46 int buffer_size = std::max(

	47 kMaxNumberOfBuffersInFifo * source_params_.frames_per_buffer(),

	48 kMaxNumberOfBuffersInFifo * sink_params_.frames_per_buffer());

	49 fifo_.reset(new media::AudioFifo(source_params_.channels(), buffer_size));

	50 // TODO(xians): Use CreateWrapper to save one memcpy.

	51 audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(),

	52 sink_params_.frames_per_buffer());

	53 }

	54

	55 virtual ~WebRtcAudioConverter() {

	56 DCHECK(thread_checker_.CalledOnValidThread());

	57 audio_converter_.RemoveInput(this);

	58 }

	59

	60 void Push(media::AudioBus* audio_source) {

	61 // Called on the realtime audio thread, which must be the same thread as

	62 // calling Convert().
	DaleCurtis 2013/11/07 20:44:08 Isn't this called on the capture thread while Conv Isn't this called on the capture thread while Convert() is called on the render thread? Also, Please use consistent naming instead of "realtime audio thread". If Push() and Convert() must be called on the same thread, consider another ThreadChecker which is bound on Push() or Convert(). no longer working on chromium 2013/11/08 13:01:15 There are two Converters used by the processor: ca Show quoted text On 2013/11/07 20:44:08, DaleCurtis wrote: > Isn't this called on the capture thread while Convert() is called on the render > thread? Also, Please use consistent naming instead of "realtime audio thread". > There are two Converters used by the processor: capture_converter_ and render_converter_. capture_converter_ is used to convert the capture data, and render_converter_ is used to convert the render data. The Push() and Convert() methods of capture_converter_ are called on the capture audio thread, while render_converter_ is called on the render audio thread. I updated the comment and hopefully make it more clear. Show quoted text > If Push() and Convert() must be called on the same thread, consider another > ThreadChecker which is bound on Push() or Convert(). Done with adding another threadchecker, and I have to add a flag to avoid unnecessary DetachFromThread() call.
	63 fifo_->Push(audio_source);

	64 }

	65

	66 bool Convert(webrtc::AudioFrame* out) {

	67 // Called on realtime audio thread.

	68 // Return false if there is no 10ms data in the FIFO.

	69 if (fifo_->frames() < (source_params_.sample_rate() / 100))

	70 return false;

	71

	72 // Convert 10ms data to the output format, this will trigger ProvideInput().

	73 audio_converter_.Convert(audio_wrapper_.get());

	74

	75 // TODO(xians): Figure out a better way to handle the interleaved and

	76 // deinterleaved format switching.

	77 audio_wrapper_->ToInterleaved(audio_wrapper_->frames(),

	78 sink_params_.bits_per_sample() / 8,

	79 out->data_);

	80

	81 out->samples_per_channel_ = sink_params_.frames_per_buffer();

	82 out->sample_rate_hz_ = sink_params_.sample_rate();

	83 out->speech_type_ = webrtc::AudioFrame::kNormalSpeech;

	84 out->vad_activity_ = webrtc::AudioFrame::kVadUnknown;

	85 out->num_channels_ = sink_params_.channels();

	86

	87 return true;

	88 }

	89

	90 const media::AudioParameters& source_parameters() const {

	91 return source_params_;

	92 }

	93 const media::AudioParameters& sink_parameters() const {

	94 return sink_params_;

	95 }

	96

	97 private:

	98 // AudioConverter::InputCallback implementation.

	99 virtual double ProvideInput(media::AudioBus* audio_bus,

	100 base::TimeDelta buffer_delay) {

	101 // Called on realtime audio thread.
	DaleCurtis 2013/11/07 20:44:08 Ditto. Ditto. no longer working on chromium 2013/11/08 13:01:15 The same. Show quoted text On 2013/11/07 20:44:08, DaleCurtis wrote: > Ditto. The same.
	102 // TODO(xians): Figure out why the first Convert() triggers ProvideInput

	103 // two times.

	104 if (fifo_->frames() < audio_bus->frames())

	105 return 0;

	106

	107 fifo_->Consume(audio_bus, 0, audio_bus->frames());

	108 return 1.0;

	109 }

	110

	111 base::ThreadChecker thread_checker_;

	112 media::AudioParameters source_params_;

	113 media::AudioParameters sink_params_;

	114

	115 // TODO(xians): consider using SincResampler to save some memcpy.

	116 // Handles mixing and resampling between input and output parameters.

	117 media::AudioConverter audio_converter_;

	118 scoped_ptr<media::AudioBus> audio_wrapper_;

	119 scoped_ptr<media::AudioFifo> fifo_;

	120 };

	121

	122 WebRtcAudioProcessor::WebRtcAudioProcessor(

	123 const webrtc::MediaConstraintsInterface* constraints)

	124 : render_delay_ms_(0) {

	125 InitializeAudioProcessingModule(constraints);

	126 }

	127

	128 WebRtcAudioProcessor::~WebRtcAudioProcessor() {

	129 DCHECK(thread_checker_.CalledOnValidThread());

	130 StopAudioProcessing();

	131 }

	132

	133 void WebRtcAudioProcessor::SetCaptureFormat(

	134 const media::AudioParameters& source_params) {

	135 DCHECK(thread_checker_.CalledOnValidThread());

	136 DCHECK(source_params.IsValid());

	137

	138 // Create and initialize audio converter for the source data.

	139 // When the webrtc AudioProcessing is enabled, the sink format of the

	140 // converter will be the same as the post-processed data format, which is

	141 // 32k mono for desktops and 16k mono for Android. When the AudioProcessing

	142 // is disabled, the sink format will be the same as the source format.

	143 const int sink_sample_rate = audio_processing_ ?

	144 kAudioProcessingSampleRate : source_params.sample_rate();

	145 const media::ChannelLayout sink_channel_layout = audio_processing_ ?

	146 media::CHANNEL_LAYOUT_MONO : source_params.channel_layout();

	147

	148 // WebRtc is using 10ms data as its native packet size.

	149 media::AudioParameters sink_params(

	150 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout,

	151 sink_sample_rate, 16, sink_sample_rate / 100);

	152 capture_converter_.reset(

	153 new WebRtcAudioConverter(source_params, sink_params));

	154 }

	155

	156 void WebRtcAudioProcessor::PushCaptureData(media::AudioBus* audio_source) {

	157 capture_converter_->Push(audio_source);

	158 }

	159

	160 bool WebRtcAudioProcessor::ProcessAndConsumeData(

	161 int capture_audio_delay_ms, int volume, bool key_pressed,

	162 int16** out) {
	DaleCurtis 2013/11/07 20:44:08 Instead of int16, should this just be a WebRtcAu Instead of int16, should this just be a WebRtcAudioFrame? Out is now pointing at an internally owned construct. it'd be better if whoever calls ProcessAndConsumeData owned the data structure. Alternatively you can copy the data out at this point. no longer working on chromium 2013/11/08 13:01:15 out is supposed to be used by all clients of the t Show quoted text On 2013/11/07 20:44:08, DaleCurtis wrote: > Instead of int16*, should this just be a WebRtcAudioFrame? Out is now pointing > at an internally owned construct. it'd be better if whoever calls > ProcessAndConsumeData owned the data structure. > out is supposed to be used by all clients of the track, like webaudio, local renderer, webrtc peer connection, so it'd better be generic type. WebRtcAudioFrame is not a good candidate. Show quoted text > Alternatively you can copy the data out at this point. I would like to avoid the memcpy, if this does not look good to you, I can make a accessor in WebRtcAudioProcessor like : int16 output_data() const { capture_frame_.data_; } And the client of WebRtcAudioProcessor can get the output data via the accesor. wdyt?
	163 TRACE_EVENT0("audio",

	164 "WebRtcAudioProcessor::ProcessAndConsumeData");

	165

	166 if (!capture_converter_->Convert(&capture_frame_))

	167 return false;

	168

	169 ProcessData(&capture_frame_, capture_audio_delay_ms, volume, key_pressed);

	170 *out = capture_frame_.data_;

	171

	172 return true;

	173 }

	174

	175 const media::AudioParameters& WebRtcAudioProcessor::OutputFormat() const {

	176 return capture_converter_->sink_parameters();

	177 }

	178

	179 void WebRtcAudioProcessor::ProcessData(webrtc::AudioFrame* audio_frame,
	DaleCurtis 2013/11/07 20:44:08 As with above, maybe having a DCHECK(capture_threa As with above, maybe having a DCHECK(capture_thread_checker_.CalledOnValidThread()); and DCHECK(render_thread_checker_.CalledOnValidThread()); might improve clarity of this code a lot. no longer working on chromium 2013/11/08 13:01:15 Done. Show quoted text On 2013/11/07 20:44:08, DaleCurtis wrote: > As with above, maybe having a > DCHECK(capture_thread_checker_.CalledOnValidThread()); > > and > DCHECK(render_thread_checker_.CalledOnValidThread()); > > might improve clarity of this code a lot. Done.
	180 int capture_audio_delay_ms,

	181 int volume,

	182 bool key_pressed) {

	183 if (!audio_processing_)

	184 return;

	185

	186 TRACE_EVENT0("audio", "WebRtcAudioProcessor::Process10MsData");

	187 DCHECK_EQ(audio_processing_->sample_rate_hz(),

	188 capture_converter_->sink_parameters().sample_rate());

	189 DCHECK_EQ(audio_processing_->num_input_channels(),

	190 capture_converter_->sink_parameters().channels());

	191 DCHECK_EQ(audio_processing_->num_output_channels(),

	192 capture_converter_->sink_parameters().channels());

	193

	194 int total_delay_ms = 0;

	195 {

	196 base::AutoLock auto_lock(lock_);

	197 total_delay_ms = capture_audio_delay_ms + render_delay_ms_;

	198 }

	199

	200 audio_processing_->set_stream_delay_ms(total_delay_ms);

	201 webrtc::GainControl* agc = audio_processing_->gain_control();

	202 if (agc->set_stream_analog_level(volume))

	203 NOTREACHED();

	204 int err = audio_processing_->ProcessStream(audio_frame);

	205 DCHECK(!err) << "ProcessStream() error: " << err;

	206

	207 // TODO(xians): Add support for AGC, typing detectin, audio level calculation,

	208 // stereo swapping.

	209 }

	210

	211 void WebRtcAudioProcessor::PushRenderData(

	212 const int16* render_audio, int sample_rate, int number_of_channels,

	213 int number_of_frames, int render_delay_ms) {

	214 // Return immediately if the echo cancellation is off.

	215 if (!audio_processing_ \|\|

	216 !audio_processing_->echo_cancellation()->is_enabled())

	217 return;

	218

	219 TRACE_EVENT0("audio",

	220 "WebRtcAudioProcessor::FeedRenderDataToAudioProcessing");

	221 {

	222 base::AutoLock auto_lock(lock_);

	223 render_delay_ms_ = render_delay_ms;

	224 }

	225

	226 InitializeRenderConverterIfNeeded(sample_rate, number_of_channels,

	227 number_of_frames);

	228

	229 // TODO(xians): Avoid this extra interleave/deinterleave.

	230 render_data_bus_->FromInterleaved(render_audio,

	231 render_data_bus_->frames(),

	232 sizeof(render_audio[0]));

	233 render_converter_->Push(render_data_bus_.get());

	234 while (render_converter_->Convert(&render_frame_)) {

	235 audio_processing_->AnalyzeReverseStream(&render_frame_);

	236 }

	237 }

	238

	239 void WebRtcAudioProcessor::InitializeAudioProcessingModule(

	240 const webrtc::MediaConstraintsInterface* constraints) {

	241 if (!CommandLine::ForCurrentProcess()->HasSwitch(

	242 switches::kEnableAudioTrackProcessing)) {

	243 return;

	244 }

	245

	246 if (!constraints)

	247 return;

	248

	249 const bool enable_aec = GetPropertyFromConstraints(

	250 constraints, MediaConstraintsInterface::kEchoCancellation);

	251 const bool enable_ns = GetPropertyFromConstraints(

	252 constraints, MediaConstraintsInterface::kNoiseSuppression);

	253 const bool enable_high_pass_filter = GetPropertyFromConstraints(

	254 constraints, MediaConstraintsInterface::kHighpassFilter);

	255 const bool start_aec_dump = GetPropertyFromConstraints(

	256 constraints, MediaConstraintsInterface::kInternalAecDump);

	257 #if defined(IOS) \|\| defined(ANDROID)

	258 const bool enable_experimental_aec = false;

	259 const bool enable_typing_detection = false;

	260 #else

	261 const bool enable_experimental_aec = GetPropertyFromConstraints(

	262 constraints, MediaConstraintsInterface::kExperimentalEchoCancellation);

	263 const bool enable_typing_detection = GetPropertyFromConstraints(

	264 constraints, MediaConstraintsInterface::kTypingNoiseDetection);

	265 #endif

	266

	267 // Reset the audio processing to NULL if no audio processing component is

	268 // enabled.

	269 if (!enable_aec && !enable_experimental_aec && !enable_ns &&

	270 !enable_high_pass_filter && !enable_typing_detection) {

	271 return;

	272 }

	273

	274 // Create and configure the audio processing if it does not exist.

	275 if (!audio_processing_)

	276 audio_processing_.reset(webrtc::AudioProcessing::Create(0));

	277

	278 // Enable the audio processing components.

	279 if (enable_aec) {

	280 EnableEchoCancellation(audio_processing_.get());

	281

	282 if (enable_experimental_aec)

	283 EnableExperimentalEchoCancellation(audio_processing_.get());

	284 }

	285

	286 if (enable_ns)

	287 EnableNoiseSuppression(audio_processing_.get());

	288

	289 if (enable_high_pass_filter)

	290 EnableHighPassFilter(audio_processing_.get());

	291

	292 if (enable_typing_detection)

	293 EnableTypingDetection(audio_processing_.get());

	294

	295 if (enable_aec && start_aec_dump)

	296 StartAecDump(audio_processing_.get());

	297

	298 // Configure the audio format the audio processing is running on. This

	299 // has to be done after all the needed components are enabled.

	300 if (audio_processing_->set_sample_rate_hz(kAudioProcessingSampleRate))

	301 NOTREACHED();

	302 if (audio_processing_->set_num_channels(kAudioProcessingNumberOfChannel,

	303 kAudioProcessingNumberOfChannel))

	304 NOTREACHED();

	305 }

	306

	307 void WebRtcAudioProcessor::InitializeRenderConverterIfNeeded(

	308 int sample_rate, int number_of_channels, int frames_per_buffer) {

	309 // TODO, figure out if we need to handle the buffer size change.

	310 if (render_converter_.get() &&

	311 render_converter_->source_parameters().sample_rate() == sample_rate &&

	312 render_converter_->source_parameters().channels() == number_of_channels) {

	313 // Do nothing if the \|render_converter_\| has been setup properly.

	314 return;

	315 }

	316

	317 media::AudioParameters source_params(

	318 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

	319 media::GuessChannelLayout(number_of_channels), sample_rate, 16,

	320 frames_per_buffer);

	321 media::AudioParameters sink_params(

	322 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

	323 media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16,

	324 kAudioProcessingSampleRate / 100);

	325 render_converter_.reset(new WebRtcAudioConverter(source_params, sink_params));

	326 render_data_bus_ = media::AudioBus::Create(number_of_channels,

	327 frames_per_buffer);

	328 }

	329

	330 void WebRtcAudioProcessor::StopAudioProcessing() {

	331 if (!audio_processing_.get())

	332 return;

	333

	334 // It is safe to stop the AEC dump even it is not started.

	335 StopAecDump(audio_processing_.get());

	336

	337 audio_processing_.reset();

	338 }

	339

	340 } // namespace content

OLD	NEW