content/renderer/media/webrtc_audio_processor.cc - Issue 54383003: Added an "enable-audio-processor" flag and WebRtcAudioProcessor class

Side by Side Diff: content/renderer/media/webrtc_audio_processor.cc

Issue 54383003: Added an "enable-audio-processor" flag and WebRtcAudioProcessor class (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: added unittest Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« content/renderer/media/webrtc_audio_processor.h ('K') | « content/renderer/media/webrtc_audio_processor.h ('k') | content/renderer/media/webrtc_audio_processor_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "content/renderer/media/webrtc_audio_processor.h"

	6

	7 #include "base/command_line.h"

	8 #include "base/debug/trace_event.h"

	9 #include "content/public/common/content_switches.h"

	10 #include "media/audio/audio_parameters.h"

	11 #include "media/base/audio_converter.h"

	12 #include "media/base/audio_fifo.h"

	13 #include "media/base/channel_layout.h"

	14

	15 namespace content {

	16

	17 namespace {

	18

	19 using webrtc::AudioProcessing;

	20 using webrtc::MediaConstraintsInterface;

	21

	22 #if defined(ANDROID)

	23 const int kAudioProcessingSampleRate = 16000;

	24 #else

	25 const int kAudioProcessingSampleRate = 32000;

	26 #endif

	27 const int kAudioProcessingNumberOfChannel = 1;

	28

	29 const int kMaxNumberOfBuffersInFifo = 2;

	30

	31 bool GetPropertyFromConstraints(const MediaConstraintsInterface* constraints,

	32 const std::string& key) {

	33 bool value = false;

	34 return webrtc::FindConstraint(constraints, key, &value, NULL) && value;

	35 }

	36

	37 // Extract all this methods to a helper class.

	38 void EnableEchoCancellation(AudioProcessing* audio_processing) {

	39 DCHECK(audio_processing);

	40 #if defined(IOS) \|\| defined(ANDROID)

	41 // Mobile devices are using AECM.

	42 if (audio_processing->echo_control_mobile()->Enable(true))

	43 NOTREACHED();

	44

	45 if (audio_processing->echo_control_mobile()->set_routing_mode(

	46 webrtc::EchoControlMobile::kSpeakerphone))

	47 NOTREACHED();

	48

	49 return;

	50 #endif

	51 if (audio_processing->echo_cancellation()->Enable(true))

	52 NOTREACHED();

	53 if (audio_processing->echo_cancellation()->set_suppression_level(

	54 webrtc::EchoCancellation::kHighSuppression))

	55 NOTREACHED();

	56

	57 // Enable the metrics for AEC.

	58 if (audio_processing->echo_cancellation()->enable_metrics(true))

	59 NOTREACHED();

	60 if (audio_processing->echo_cancellation()->enable_delay_logging(true))

	61 NOTREACHED();

	62 }

	63

	64 void EnableNoiseSuppression(AudioProcessing* audio_processing) {

	65 DCHECK(audio_processing);

	66 if (audio_processing->noise_suppression()->set_level(

	67 webrtc::NoiseSuppression::kHigh))

	68 NOTREACHED();

	69

	70 if (audio_processing->noise_suppression()->Enable(true))

	71 NOTREACHED();

	72 }

	73

	74 void EnableHighPassFilter(AudioProcessing* audio_processing) {

	75 DCHECK(audio_processing);

	76 if (audio_processing->high_pass_filter()->Enable(true))

	77 NOTREACHED();

	78 }

	79

	80 // TODO(xians): stereo swapping

	81 void EnableTypingDetection(AudioProcessing* audio_processing) {

	82 DCHECK(audio_processing);

	83 if (audio_processing->voice_detection()->Enable(true))

	84 NOTREACHED();

	85

	86 if (audio_processing->voice_detection()->set_likelihood(

	87 webrtc::VoiceDetection::kVeryLowLikelihood))

	88 NOTREACHED();

	89 }

	90

	91 void EnableExperimentalEchoCancellation(AudioProcessing* audio_processing) {

	92 DCHECK(audio_processing);

	93 webrtc::Config config;

	94 config.Set<webrtc::DelayCorrection>(new webrtc::DelayCorrection(true));

	95 audio_processing->SetExtraOptions(config);

	96 }

	97

	98 void StartAecDump(AudioProcessing* audio_processin) {

	99 static const char kAecDumpFilename[] = "/tmp/audio.aecdump";

	100 if (audio_processin->StartDebugRecording(kAecDumpFilename))

	101 LOG(ERROR) << "Fail to start AEC debug recording";

	102 }

	103

	104 void StopAecDump(AudioProcessing* audio_processin) {

	105 if (audio_processin->StopDebugRecording())

	106 LOG(ERROR) << "Fail to stop AEC debug recording";

	107 }

	108

	109 } // namespace

	110

	111 class WebRtcAudioProcessor::WebRtcAudioConverter

	112 : public media::AudioConverter::InputCallback {

	113 public:

	114 WebRtcAudioConverter(const media::AudioParameters& source_params,

	115 const media::AudioParameters& sink_params) {

	116 source_params_ = source_params;

	117 sink_params_ = sink_params;

	118

	119 // Create the audio converter which is responsible for down-mixing and

	120 // resampling.

	121 audio_converter_.reset(

	122 new media::AudioConverter(source_params, sink_params_, false));

	123 audio_converter_->AddInput(this);

	124

	125 // Create and initialize audio fifo and audio bus wrapper.

	126 // The size of the FIFO should be at least twice of the source buffer size

	127 // or twice of the sink buffer size.

	128 int buffer_size = std::max(

	129 kMaxNumberOfBuffersInFifo * source_params.frames_per_buffer(),

	130 kMaxNumberOfBuffersInFifo * sink_params_.frames_per_buffer());

	131 fifo_.reset(new media::AudioFifo(source_params.channels(), buffer_size));

	132 // TODO(xians): Use CreateWrapper to save one memcpy.

	133 audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(),

	134 sink_params_.frames_per_buffer());

	135 }

	136

	137 ~WebRtcAudioConverter() {

	138 audio_converter_->RemoveInput(this);

	139 }

	140

	141 void Push(media::AudioBus* audio_source) {

	142 DCHECK(fifo_->frames() + audio_source->frames() <= fifo_->max_frames());

	143 fifo_->Push(audio_source);

	144 }

	145

	146 bool Convert() {

	147 // Return false if there is no 10ms data in the FIFO.

	148 if (fifo_->frames() < (source_params_.sample_rate() / 100))

	149 return false;

	150

	151 // Convert 10ms data to the output format, this will trigger ProvideInput().

	152 audio_converter_->Convert(audio_wrapper_.get());

	153

	154 // TODO(xians): Figure out a better way to handle the interleaved and

	155 // deinterleaved format switching.

	156 audio_wrapper_->ToInterleaved(audio_wrapper_->frames(), 2,

	157 audio_frame_.data_);

	158

	159 audio_frame_.samples_per_channel_ = sink_params_.frames_per_buffer();

	160 audio_frame_.sample_rate_hz_ = sink_params_.sample_rate();

	161 audio_frame_.speech_type_ = webrtc::AudioFrame::kNormalSpeech;

	162 audio_frame_.vad_activity_ = webrtc::AudioFrame::kVadUnknown;

	163 audio_frame_.num_channels_ = sink_params_.channels();

	164

	165 return true;

	166 }

	167

	168 webrtc::AudioFrame* audio_frame() { return &audio_frame_; }

	169 const media::AudioParameters& source_parameters() const {

	170 return source_params_;

	171 }

	172 const media::AudioParameters& sink_parameters() const {

	173 return sink_params_;

	174 }

	175

	176 private:

	177 // AudioConverter::InputCallback implementation.

	178 virtual double ProvideInput(media::AudioBus* audio_bus,

	179 base::TimeDelta buffer_delay) {

	180 // The first Convert() can trigger ProvideInput two times, use SincResampler

	181 // to fix the problem.

	182 if (fifo_->frames() < audio_bus->frames())

	183 return 0;

	184

	185 fifo_->Consume(audio_bus, 0, audio_bus->frames());

	186 return 1.0;

	187 }

	188

	189 webrtc::AudioFrame audio_frame_;

	190

	191 // TODO(xians): consider using SincResampler to save some memcpy.

	192 // Handles mixing and resampling between input and output parameters.

	193 scoped_ptr<media::AudioConverter> audio_converter_;

	194 scoped_ptr<media::AudioBus> audio_wrapper_;

	195 scoped_ptr<media::AudioFifo> fifo_;

	196

	197 media::AudioParameters source_params_;

	198 media::AudioParameters sink_params_;

	199 };

	200

	201 WebRtcAudioProcessor::WebRtcAudioProcessor(

	202 const webrtc::MediaConstraintsInterface* constraints)

	203 : render_delay_ms_(0) {

	204 InitializeAudioProcessingModule(constraints);

	205 }

	206

	207 WebRtcAudioProcessor::~WebRtcAudioProcessor() {

	208 StopAudioProcessing();

	209 }

	210

	211 void WebRtcAudioProcessor::SetCaptureFormat(

	212 const media::AudioParameters& source_params) {

	213 DCHECK(source_params.IsValid());

	214

	215 // Create and initialize audio converter for the source data.

	216 int sink_sample_rate = audio_processing_.get() ?

	217 kAudioProcessingSampleRate : source_params.sample_rate();

	218 media::ChannelLayout sink_channel_layout = audio_processing_.get() ?

	219 media::CHANNEL_LAYOUT_MONO : source_params.channel_layout();

	220

	221 // WebRtc is using 10ms data as its native packet size.

	222 media::AudioParameters sink_params(

	223 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout,

	224 sink_sample_rate, 16, sink_sample_rate / 100);

	225 capture_converter_.reset(

	226 new WebRtcAudioConverter(source_params, sink_params));

	227 }

	228

	229 void WebRtcAudioProcessor::PushCaptureData(media::AudioBus* audio_source) {

	230 DCHECK(capture_converter_.get());

	231 capture_converter_->Push(audio_source);

	232 }

	233

	234 bool WebRtcAudioProcessor::ProcessAndConsume10MsData(

	235 int capture_audio_delay_ms, int volume, bool key_pressed,

	236 int16** out) {

	237 TRACE_EVENT0("audio",

	238 "WebRtcAudioProcessor::ProcessAndConsume10MsData");

	239

	240 if (!capture_converter_->Convert())

	241 return false;

	242

	243 ProcessData(capture_audio_delay_ms, volume, key_pressed);

	244 *out = capture_converter_->audio_frame()->data_;

	245

	246 return true;

	247 }

	248

	249 const media::AudioParameters& WebRtcAudioProcessor::OutputFormat() const {

	250 return capture_converter_->sink_parameters();

	251 }

	252

	253 void WebRtcAudioProcessor::ProcessData(int capture_audio_delay_ms,

	254 int volume,

	255 bool key_pressed) {

	256 if (!audio_processing_.get())

	257 return;

	258

	259 TRACE_EVENT0("audio", "WebRtcAudioProcessor::Process10MsData");

	260 DCHECK_EQ(audio_processing_->sample_rate_hz(),

	261 capture_converter_->sink_parameters().sample_rate());

	262 DCHECK_EQ(audio_processing_->num_input_channels(),

	263 capture_converter_->sink_parameters().channels());

	264 DCHECK_EQ(audio_processing_->num_output_channels(),

	265 capture_converter_->sink_parameters().channels());

	266

	267 // TODO(xians): Sum the capture delay and render delay.

	268 int total_delay_ms = 0;

	269 {

	270 base::AutoLock auto_lock(lock_);

	271 total_delay_ms = capture_audio_delay_ms + render_delay_ms_;

	272 }

	273

	274 audio_processing_->set_stream_delay_ms(total_delay_ms);

	275 webrtc::GainControl* agc = audio_processing_->gain_control();

	276 if (agc->set_stream_analog_level(volume))

	277 NOTREACHED();

	278 int err = audio_processing_->ProcessStream(

	279 capture_converter_->audio_frame());

	280 if (err) {

	281 NOTREACHED() << "ProcessStream() error: " << err;

	282 }

	283

	284 // TODO(xians): Fixed the AGC, typing detectin, audio level calculation,

	285 // stereo swapping.

	286 }

	287

	288 void WebRtcAudioProcessor::FeedRenderDataToAudioProcessing(

	289 const int16* render_audio, int sample_rate, int number_of_channels,

	290 int number_of_frames, int render_delay_ms) {

	291 // Return immediately if the echo cancellation is off.

	292 if (!audio_processing_.get() \|\|

	293 !audio_processing_->echo_cancellation()->is_enabled())

	294 return;

	295

	296 TRACE_EVENT0("audio",

	297 "WebRtcAudioProcessor::FeedRenderDataToAudioProcessing");

	298 {

	299 base::AutoLock auto_lock(lock_);

	300 render_delay_ms_ = render_delay_ms;

	301 }

	302

	303 InitializeRenderConverterIfNeeded(sample_rate, number_of_channels,

	304 number_of_frames);

	305 DCHECK(render_converter_.get());

	306

	307 // TODO(xians): Avoid this extra interleave/deinterleave.

	308 scoped_ptr<media::AudioBus> data_bus = media::AudioBus::Create(

	309 number_of_channels, number_of_frames);

	310 data_bus->FromInterleaved(render_audio,

	311 data_bus->frames(),

	312 sizeof(render_audio[0]));

	313 render_converter_->Push(data_bus.get());

	314 while (render_converter_->Convert()) {

	315 audio_processing_->AnalyzeReverseStream(render_converter_->audio_frame());

	316 }

	317 }

	318

	319 void WebRtcAudioProcessor::InitializeAudioProcessingModule(

	320 const webrtc::MediaConstraintsInterface* constraints) {

	321 const CommandLine& command_line = *CommandLine::ForCurrentProcess();

	322 if (!command_line.HasSwitch(switches::kEnableWebRtcAudioProcessor))

	323 return;

	324

	325 if (!constraints)

	326 return;

	327

	328 bool enable_aec = GetPropertyFromConstraints(

	329 constraints, MediaConstraintsInterface::kEchoCancellation);

	330 bool enable_experimental_aec = GetPropertyFromConstraints(

	331 constraints, MediaConstraintsInterface::kExperimentalEchoCancellation);

	332 bool enable_ns = GetPropertyFromConstraints(

	333 constraints, MediaConstraintsInterface::kNoiseSuppression);

	334 bool enable_high_pass_filter = GetPropertyFromConstraints(

	335 constraints, MediaConstraintsInterface::kHighpassFilter);

	336 bool enable_typing_detection = GetPropertyFromConstraints(

	337 constraints, MediaConstraintsInterface::kTypingNoiseDetection);

	338 // TODO(xians): How to start and stop AEC dump?

	339 bool start_aec_dump = GetPropertyFromConstraints(

	340 constraints, MediaConstraintsInterface::kInternalAecDump);

	341 #if defined(IOS) \|\| defined(ANDROID)

	342 enable_typing_detection = false;

	343 enable_experimental_aec = false;

	344 #endif

	345

	346 // Reset the audio processing to NULL if no audio processing component is

	347 // enabled.

	348 if (!enable_aec && !enable_experimental_aec && !enable_ns &&

	349 !enable_high_pass_filter && !enable_typing_detection) {

	350 return;

	351 }

	352

	353 // Create and configure the audio processing if it does not exist.

	354 if (!audio_processing_.get())

	355 audio_processing_.reset(webrtc::AudioProcessing::Create(0));

	356

	357 // Enable the audio processing components.

	358 if (enable_aec) {

	359 EnableEchoCancellation(audio_processing_.get());

	360

	361 if (enable_experimental_aec)

	362 EnableExperimentalEchoCancellation(audio_processing_.get());

	363 }

	364

	365 if (enable_ns)

	366 EnableNoiseSuppression(audio_processing_.get());

	367

	368 if (enable_high_pass_filter)

	369 EnableHighPassFilter(audio_processing_.get());

	370

	371 if (enable_typing_detection)

	372 EnableTypingDetection(audio_processing_.get());

	373

	374 if (enable_aec && start_aec_dump)

	375 StartAecDump(audio_processing_.get());

	376

	377 // Configure the audio format the audio processing is running on. This

	378 // has to be done after all the needed components are enabled.

	379 if (audio_processing_->set_sample_rate_hz(kAudioProcessingSampleRate))

	380 NOTREACHED();

	381 if (audio_processing_->set_num_channels(kAudioProcessingNumberOfChannel,

	382 kAudioProcessingNumberOfChannel))

	383 NOTREACHED();

	384 }

	385

	386 void WebRtcAudioProcessor::InitializeRenderConverterIfNeeded(

	387 int sample_rate, int number_of_channels, int frames_per_buffer) {

	388 // TODO, figure out if we need to handle the buffer size change.

	389 if (render_converter_.get() &&

	390 render_converter_->source_parameters().sample_rate() == sample_rate &&

	391 render_converter_->source_parameters().channels() == number_of_channels) {

	392 // Do nothing if the \|render_converter_\| has been setup properly.

	393 return;

	394 }

	395

	396 media::AudioParameters source_params(

	397 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

	398 media::GuessChannelLayout(number_of_channels), sample_rate, 16,

	399 frames_per_buffer);

	400 media::AudioParameters sink_params(

	401 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

	402 media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16,

	403 kAudioProcessingSampleRate / 100);

	404 render_converter_.reset(new WebRtcAudioConverter(source_params, sink_params));

	405 }

	406

	407 void WebRtcAudioProcessor::StopAudioProcessing() {

	408 if (!audio_processing_.get())

	409 return;

	410

	411 // It is safe to stop the AEC dump even it is not started.

	412 StopAecDump(audio_processing_.get());

	413

	414 audio_processing_.reset();

	415 }

	416

	417 } // namespace content

OLD	NEW