content/renderer/media/speech_recognition_audio_source_provider.cc - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Side by Side Diff: content/renderer/media/speech_recognition_audio_source_provider.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Refactoring, error states, more comments. Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« content/renderer/media/speech_recognition_audio_source_provider.h ('K') | « content/renderer/media/speech_recognition_audio_source_provider.h ('k') | content/renderer/media/speech_recognition_audio_source_provider_unittest.cc » ('j') | content/renderer/media/speech_recognition_audio_source_provider_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "content/renderer/media/speech_recognition_audio_source_provider.h"

	6

	7 #include "base/logging.h"

	8 #include "base/memory/shared_memory.h"

	9 #include "base/time/time.h"

	10 #include "media/audio/audio_parameters.h"

	11 #include "media/base/audio_fifo.h"

	12

	13 namespace content {

	14

	15 SpeechRecognitionAudioSourceProvider::SpeechRecognitionAudioSourceProvider(

	16 const blink::WebMediaStreamTrack& track,

	17 const media::AudioParameters& params, const base::SharedMemoryHandle memory,

	18 base::SyncSocket* socket, OnStoppedCB on_stopped_cb)

	19 : track_(track),

	20 shared_memory_(memory, false),

	21 socket_(socket),

	22 output_params_(params),

	23 track_stopped_(false),

	24 buffer_index_(0),

	25 on_stopped_cb_(on_stopped_cb) {

	26 DCHECK(socket);

	27 DCHECK(main_render_thread_checker_.CalledOnValidThread());

	28 DCHECK(params.IsValid());

	29 DCHECK(IsSupportedTrack(track));

	30 const size_t memory_length = media::AudioBus::CalculateMemorySize(params) +

	31 sizeof(media::AudioInputBufferParameters);

	32 CHECK(shared_memory_.Map(memory_length));

	33

	34 // Buffer index for sync with client is \|params.size\| on the shared memory.

	35 uint8* ptr = static_cast<uint8*>(shared_memory_.memory());

	36 media::AudioInputBuffer* buffer =

	37 reinterpret_cast<media::AudioInputBuffer*>(ptr);

	38 peer_buffer_index_ = &(buffer->params.size);

	39

	40 // Client must manage his own counter and reset it.

	41 DCHECK_EQ(0U, *peer_buffer_index_);

	42 output_bus_ = media::AudioBus::WrapMemory(params, buffer->audio);

	43

	44 // Connect the source provider to the track as a sink.

	45 MediaStreamAudioSink::AddToAudioTrack(this, track_);

	46 }

	47

	48 SpeechRecognitionAudioSourceProvider::~SpeechRecognitionAudioSourceProvider() {

	49 DCHECK(main_render_thread_checker_.CalledOnValidThread());

	50 if (audio_converter_.get())

	51 audio_converter_->RemoveInput(this);

	52

	53 // Notify the track before this sink goes away.

	54 if (!track_stopped_)

	55 MediaStreamAudioSink::RemoveFromAudioTrack(this, track_);

	56 }

	57

	58 // static

	59 bool SpeechRecognitionAudioSourceProvider::IsSupportedTrack(

	60 const blink::WebMediaStreamTrack& track) {

	61 if (track.source().type() != blink::WebMediaStreamSource::TypeAudio)

	62 return false;

	63

	64 MediaStreamAudioSource* native_source =

	65 static_cast<MediaStreamAudioSource*>(track.source().extraData());

	66 if (!native_source)

	67 return false;

	68

	69 const StreamDeviceInfo& device_info = native_source->device_info();

	70 // Purposely only support tracks from an audio device. Dissallow WebAudio.

	71 return (device_info.device.type == content::MEDIA_DEVICE_AUDIO_CAPTURE);

	72 }

	73

	74 void SpeechRecognitionAudioSourceProvider::OnSetFormat(

	75 const media::AudioParameters& input_params) {

	76 DCHECK(input_params.IsValid());

	77 DCHECK_LE(

	78 input_params.frames_per_buffer() * 1000 / input_params.sample_rate(),

	79 output_params_.frames_per_buffer() * 1000 / output_params_.sample_rate());

	80

	81 // We need detach the thread here because it will be a new capture thread

	82 // calling OnSetFormat() and OnData() if the source is restarted.

	83 capture_thread_checker_.DetachFromThread();

	84

	85 input_params_ = input_params;

	86 fifo_buffer_size_ =

	87 std::ceil(output_params_.frames_per_buffer() *

	88 static_cast<double>(input_params_.sample_rate()) /

	89 output_params_.sample_rate());

	90 DCHECK_GE(fifo_buffer_size_, input_params_.frames_per_buffer());

	91

	92 // Allows for some delays on the endpoint client.

	93 static const int kNumberOfBuffersInFifo = 2;

	94 int frames_in_fifo = kNumberOfBuffersInFifo * fifo_buffer_size_;

	95 fifo_.reset(new media::AudioFifo(input_params.channels(), frames_in_fifo));

	96 input_bus_ = media::AudioBus::Create(input_params.channels(),

	97 input_params.frames_per_buffer());

	98

	99 // Create the audio converter with \|disable_fifo\| as false so that the

	100 // converter will request input_params.frames_per_buffer() each time.

	101 // This will not increase the complexity as there is only one client to

	102 // the converter.

	103 audio_converter_.reset(

	104 new media::AudioConverter(input_params, output_params_, false));

	105 audio_converter_->AddInput(this);

	106 }

	107

	108 void SpeechRecognitionAudioSourceProvider::OnReadyStateChanged(

	109 blink::WebMediaStreamSource::ReadyState state) {

	110 DCHECK(main_render_thread_checker_.CalledOnValidThread());

	111 DCHECK(!track_stopped_);

	112

	113 if (state == blink::WebMediaStreamSource::ReadyStateEnded) {

	114 track_stopped_ = true;

	115

	116 if (!on_stopped_cb_.is_null())

	117 on_stopped_cb_.Run();

	118 }

	119 }

	120

	121 void SpeechRecognitionAudioSourceProvider::OnData(const int16* audio_data,

	122 int sample_rate,

	123 int number_of_channels,

	124 int number_of_frames) {

	125 DCHECK(capture_thread_checker_.CalledOnValidThread());

	126 DCHECK(peer_buffer_index_);

	127 DCHECK_EQ(input_bus_->frames(), number_of_frames);

	128 DCHECK_EQ(input_bus_->channels(), number_of_channels);

	129 if (fifo_->frames() + number_of_frames > fifo_->max_frames()) {

	130 // This would indicate a serious issue with the browser process or the

	131 // SyncSocket and/or SharedMemory. We stop delivering any data to the peer.

	132 NOTREACHED() << "Audio FIFO overflow";

	133 return;

	134 }

	135 // TODO(xians): A better way to handle the interleaved and deinterleaved

	136 // format switching, see issue/317710.

	137 input_bus_->FromInterleaved(audio_data, number_of_frames,

	138 sizeof(audio_data[0]));

	139

	140 fifo_->Push(input_bus_.get());

	141 // Wait for FIFO to have at least \|fifo_buffer_size_\| frames ready.

	142 if (fifo_->frames() < fifo_buffer_size_)

	143 return;

	144

	145 // Make sure the previous output buffer was consumed by client before we send

	146 // the next buffer. \|peer_buffer_index_\| is pointing to shared memory.

	147 // The client must write to it (incrementing by 1) once the the buffer was

	148 // consumed. This is intentional not to block this audio capturing thread.

	149 if (buffer_index_ != (*peer_buffer_index_)) {

	150 DLOG(WARNING) << "Buffer synchronization lag";

	151 return;

	152 }

	153

	154 audio_converter_->Convert(output_bus_.get());

	155

	156 // Notify client to consume buffer \|buffer_index_\| on \|output_bus_\|.

	157 const size_t bytes_sent =

	158 socket_->Send(&buffer_index_, sizeof(buffer_index_));

	159 if (bytes_sent != sizeof(buffer_index_)) {

	160 // The send ocasionally fails if the user changes his input audio device.

	161 DVLOG(1) << "Failed sending buffer index to peer";

	162 // We have discarded this buffer, but could still recover on the next one.

	163 return;

	164 }

	165

	166 // Count the sent buffer. We expect the client to do the same on his end.

	167 ++buffer_index_;

	168 }

	169

	170 double SpeechRecognitionAudioSourceProvider::ProvideInput(

	171 media::AudioBus* audio_bus, base::TimeDelta buffer_delay) {

	172 DCHECK(capture_thread_checker_.CalledOnValidThread());

	173 if (fifo_->frames() >= audio_bus->frames())

	174 fifo_->Consume(audio_bus, 0, audio_bus->frames());

	175 else

	176 audio_bus->Zero();

	177

	178 return 1.0;
	tommi (sloooow) - chröme 2014/09/24 09:51:59 document what this means? document what this means? burnik 2014/09/24 11:54:22 // Return volume greater than zero to indicate we Show quoted text On 2014/09/24 09:51:59, tommi wrote: > document what this means? // Return volume greater than zero to indicate we have more data. SGTU?
	179 }

	180

	181 } // namespace content

OLD	NEW