content/renderer/media/speech_recognition_audio_source_provider.cc - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Unified Diff: content/renderer/media/speech_recognition_audio_source_provider.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: SyncSocket leak and FIFO fixes. Test 8-192KHz for input. Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« content/renderer/media/speech_recognition_audio_source_provider.h ('K') | « content/renderer/media/speech_recognition_audio_source_provider.h ('k') | content/renderer/media/speech_recognition_audio_source_provider_unittest.cc » ('j') | content/renderer/media/speech_recognition_audio_source_provider_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/renderer/media/speech_recognition_audio_source_provider.cc

diff --git a/content/renderer/media/speech_recognition_audio_source_provider.cc b/content/renderer/media/speech_recognition_audio_source_provider.cc

new file mode 100644

index 0000000000000000000000000000000000000000..bfc779afa148616062d04ac9026e8ddce51a42c7

--- /dev/null

+++ b/content/renderer/media/speech_recognition_audio_source_provider.cc

@@ -0,0 +1,181 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "content/renderer/media/speech_recognition_audio_source_provider.h"

+#include "base/logging.h"

+#include "base/memory/shared_memory.h"

+#include "base/time/time.h"

+#include "media/audio/audio_parameters.h"

+#include "media/base/audio_fifo.h"

+namespace content {

+SpeechRecognitionAudioSourceProvider::SpeechRecognitionAudioSourceProvider(

+ const blink::WebMediaStreamTrack& track,

+ const media::AudioParameters& params, const base::SharedMemoryHandle memory,

+ base::SyncSocket* socket, OnStoppedCB on_stopped_cb)

+ : track_(track),

+ shared_memory_(memory, false),

+ socket_(socket),

+ output_params_(params),

+ track_stopped_(false),

+ buffer_index_(0),

+ on_stopped_cb_(on_stopped_cb) {

+ DCHECK(socket);

+ DCHECK(main_render_thread_checker_.CalledOnValidThread());

+ DCHECK(params.IsValid());

+ DCHECK(IsSupportedTrack(track));

+ const size_t memory_length = media::AudioBus::CalculateMemorySize(params) +

+ sizeof(media::AudioInputBufferParameters);

+ CHECK(shared_memory_.Map(memory_length));

+ // Buffer index for sync with client is |params.size| on the shared memory.

+ uint8* ptr = static_cast<uint8*>(shared_memory_.memory());

+ media::AudioInputBuffer* buffer =

+ reinterpret_cast<media::AudioInputBuffer*>(ptr);

+ peer_buffer_index_ = &(buffer->params.size);

+ // Client must manage his own counter and reset it.

+ DCHECK_EQ(0U, *peer_buffer_index_);

+ output_bus_ = media::AudioBus::WrapMemory(params, buffer->audio);

+ // Connect the source provider to the track as a sink.

+ MediaStreamAudioSink::AddToAudioTrack(this, track_);

+SpeechRecognitionAudioSourceProvider::~SpeechRecognitionAudioSourceProvider() {

+ DCHECK(main_render_thread_checker_.CalledOnValidThread());

+ if (audio_converter_.get())

+ audio_converter_->RemoveInput(this);

+ // Notify the track before this sink goes away.

+ if (!track_stopped_)

+ MediaStreamAudioSink::RemoveFromAudioTrack(this, track_);

+// static

+bool SpeechRecognitionAudioSourceProvider::IsSupportedTrack(

+ const blink::WebMediaStreamTrack& track) {

+ if (track.source().type() != blink::WebMediaStreamSource::TypeAudio)

+ return false;

+ MediaStreamAudioSource* native_source =

+ static_cast<MediaStreamAudioSource*>(track.source().extraData());

+ if (!native_source)

+ return false;

+ const StreamDeviceInfo& device_info = native_source->device_info();

+ // Purposely only support tracks from an audio device. Dissallow WebAudio.

+ return (device_info.device.type == content::MEDIA_DEVICE_AUDIO_CAPTURE);

+void SpeechRecognitionAudioSourceProvider::OnSetFormat(

+ const media::AudioParameters& input_params) {

+ DCHECK(input_params.IsValid());

+ DCHECK_LE(

+ input_params.frames_per_buffer() * 1000 / input_params.sample_rate(),

+ output_params_.frames_per_buffer() * 1000 / output_params_.sample_rate());

+ // We need detach the thread here because it will be a new capture thread

+ // calling OnSetFormat() and OnData() if the source is restarted.

+ capture_thread_checker_.DetachFromThread();

+ input_params_ = input_params;

+ fifo_buffer_size_ =

+ std::ceil(output_params_.frames_per_buffer() *

+ static_cast<double>(input_params_.sample_rate()) /

+ output_params_.sample_rate());

+ DCHECK_GE(fifo_buffer_size_, input_params_.frames_per_buffer());

+ // Allows for some delays on the endpoint client.

+ static const int kNumberOfBuffersInFifo = 2;

+ int frames_in_fifo = kNumberOfBuffersInFifo * fifo_buffer_size_;

+ fifo_.reset(new media::AudioFifo(input_params.channels(), frames_in_fifo));

+ input_bus_ = media::AudioBus::Create(input_params.channels(),

+ input_params.frames_per_buffer());

+ // Create the audio converter with |disable_fifo| as false so that the

+ // converter will request input_params.frames_per_buffer() each time.

+ // This will not increase the complexity as there is only one client to

+ // the converter.

+ audio_converter_.reset(

+ new media::AudioConverter(input_params, output_params_, false));

+ audio_converter_->AddInput(this);

+void SpeechRecognitionAudioSourceProvider::OnReadyStateChanged(

+ blink::WebMediaStreamSource::ReadyState state) {

+ DCHECK(main_render_thread_checker_.CalledOnValidThread());

+ DCHECK(!track_stopped_);

+ if (state == blink::WebMediaStreamSource::ReadyStateEnded) {

+ track_stopped_ = true;

+ if (!on_stopped_cb_.is_null())

+ on_stopped_cb_.Run();

+ }

+void SpeechRecognitionAudioSourceProvider::OnData(const int16* audio_data,

+ int sample_rate,

+ int number_of_channels,

+ int number_of_frames) {

+ DCHECK(capture_thread_checker_.CalledOnValidThread());

+ DCHECK(peer_buffer_index_);

+ DCHECK_EQ(input_bus_->frames(), number_of_frames);

+ DCHECK_EQ(input_bus_->channels(), number_of_channels);

+ if (fifo_->frames() + number_of_frames > fifo_->max_frames()) {

+ // This would indicate a serious issue with the browser process or the

+ // SyncSocket and/or SharedMemory. We stop delivering any data to the peer.

+ NOTREACHED() << "Audio FIFO overflow";

+ return;

+ }

+ // TODO(xians): A better way to handle the interleaved and deinterleaved

+ // format switching, see issue/317710.

+ input_bus_->FromInterleaved(audio_data, number_of_frames,

+ sizeof(audio_data[0]));

+ fifo_->Push(input_bus_.get());

+ // Wait for FIFO to have at least |fifo_buffer_size_| frames ready.

+ if (fifo_->frames() < fifo_buffer_size_)

+ return;

+ // Make sure the previous output buffer was consumed by client before we send

+ // the next buffer. |peer_buffer_index_| is pointing to shared memory.

+ // The client must write to it (incrementing by 1) once the the buffer was

+ // consumed. This is intentional not to block this audio capturing thread.

+ if (buffer_index_ != (*peer_buffer_index_)) {

+ DLOG(WARNING) << "Buffer synchronization lag";

+ return;

+ }

+ audio_converter_->Convert(output_bus_.get());

+ // Notify client to consume buffer |buffer_index_| on |output_bus_|.

+ const size_t bytes_sent =

+ socket_->Send(&buffer_index_, sizeof(buffer_index_));

+ if (bytes_sent != sizeof(buffer_index_)) {

+ // The send usually fails if the user changes his input audio device.

+ DVLOG(1) << "Failed sending buffer index to peer";

+ // We have discarded this buffer, but could still recover on the next one.

+ return;

+ }

+ // Count the sent buffer. We expect the client to do the same on his end.

+ ++buffer_index_;

+double SpeechRecognitionAudioSourceProvider::ProvideInput(

+ media::AudioBus* audio_bus, base::TimeDelta buffer_delay) {

+ DCHECK(capture_thread_checker_.CalledOnValidThread());

+ if (fifo_->frames() >= audio_bus->frames())

+ fifo_->Consume(audio_bus, 0, audio_bus->frames());

+ else

+ audio_bus->Zero();

+ return 1.0;

+} // namespace content