content/renderer/media/speech_recognition_audio_sink.h - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Unified Diff: content/renderer/media/speech_recognition_audio_sink.h

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Nits, comments, refactoring, rebasing. Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « content/content_tests.gypi ('k') | content/renderer/media/speech_recognition_audio_sink.cc » ('j') | content/renderer/media/speech_recognition_audio_sink.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/renderer/media/speech_recognition_audio_sink.h

diff --git a/content/renderer/media/speech_recognition_audio_sink.h b/content/renderer/media/speech_recognition_audio_sink.h

new file mode 100644

index 0000000000000000000000000000000000000000..86605c420001391b22e82d90070d08090fddba40

--- /dev/null

+++ b/content/renderer/media/speech_recognition_audio_sink.h

@@ -0,0 +1,128 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_

+#define CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_

+#include "base/callback.h"

+#include "base/memory/scoped_ptr.h"

+#include "base/memory/shared_memory.h"

+#include "base/sync_socket.h"

+#include "base/threading/thread_checker.h"

+#include "content/common/content_export.h"

+#include "content/public/renderer/media_stream_audio_sink.h"

+#include "content/renderer/media/media_stream_audio_source.h"

+#include "media/audio/audio_parameters.h"

+#include "media/base/audio_converter.h"

+#include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"

+namespace media {

+class AudioBus;

+class AudioFifo;

+namespace content {

+// SpeechRecognitionAudioSink works as an audio sink to the

+// WebRtcLocalAudioTrack. It stores the capture data into a FIFO.

+// When the FIFO has enough data for resampling, it converts it,

+// passes the buffer to the WebSpeechRecognizer via SharedMemory

+// and notifies it via SyncSocket followed by incrementing the |buffer_index_|.

+// WebSpeechRecognizer increments the shared buffer index to synchronize.

henrika (OOO until Aug 14) 2014/09/29 10:38:41 Can you clarify? What is synchronized and how does

burnik 2014/09/29 12:07:31 The buffer indices are synchronized. Detailed in d

+class CONTENT_EXPORT SpeechRecognitionAudioSink

+ : NON_EXPORTED_BASE(public media::AudioConverter::InputCallback),

+ NON_EXPORTED_BASE(public MediaStreamAudioSink) {

+ public:

+ typedef base::Callback<void()> OnStoppedCB;

+ SpeechRecognitionAudioSink(/* ExtraData reference is copied from track. */

+ const blink::WebMediaStreamTrack& track,

+ /* Output audio parameters are copied. */

+ const media::AudioParameters& params,

+ /* Handle is used to map to existing memory. */

+ const base::SharedMemoryHandle memory,

+ /* Socket ownership is passed to here. */

henrika (OOO until Aug 14) 2014/09/29 10:38:41 'to here' sounds odd. Can you rewrite?

burnik 2014/09/29 12:07:31 Done: *Socket ownership is transferred.*

+ scoped_ptr<base::SyncSocket> socket,

+ /* Callback is stored by copy. */

+ const OnStoppedCB& on_stopped_cb);

+ virtual ~SpeechRecognitionAudioSink();

+ // Returns whether the provided track is supported.

+ static bool IsSupportedTrack(const blink::WebMediaStreamTrack& track);

+ private:

+ // content::MediaStreamAudioSink implementation.

+ virtual void OnReadyStateChanged(

+ blink::WebMediaStreamSource::ReadyState state) OVERRIDE;

+ virtual void OnData(const int16* audio_data, int sample_rate,

+ int number_of_channels, int number_of_frames) OVERRIDE;

+ virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE;

+ // media::AudioConverter::Inputcallback implementation.

+ virtual double ProvideInput(media::AudioBus* audio_bus,

+ base::TimeDelta buffer_delay) OVERRIDE;

+ // Number of frames per buffer in FIFO. When the buffer is full we convert and

+ // consume it on the |output_bus_|. Size of the buffer depends on the

+ // resampler. Example: for 44.1 to 16.0 conversion, it should be 4100 frames.

+ int fifo_buffer_size_;

+ // Used to DCHECK that some methods are called on the main render thread.

+ base::ThreadChecker main_render_thread_checker_;

+ // Used to DCHECK that some methods are called on the capture audio thread.

+ base::ThreadChecker capture_thread_checker_;

+ // The audio track that this audio sink is connected to.

+ const blink::WebMediaStreamTrack track_;

+ // Shared memory used by audio buses on both browser and renderer processes.

+ base::SharedMemory shared_memory_;

+ // Socket for synchronization of audio bus reads/writes.

+ // Created on the renderer client and passed here. Accessed on capture thread.

+ scoped_ptr<base::SyncSocket> socket_;

+ // Used as a resampler to deliver appropriate format to speech recognition.

+ scoped_ptr<media::AudioConverter> audio_converter_;

+ // FIFO is used for queuing audio frames before we resample.

+ scoped_ptr<media::AudioFifo> fifo_;

+ // Audio delivered from source.

+ scoped_ptr<media::AudioBus> input_bus_;

+ // Audio bus shared with the browser process via |shared_memory_|.

+ scoped_ptr<media::AudioBus> output_bus_;

+ // Params of the source audio. Can change when |OnSetFormat()| occurs.

+ media::AudioParameters input_params_;

+ // Params used by speech recognition.

+ const media::AudioParameters output_params_;

+ // Whether the track has been stopped.

+ bool track_stopped_;

+ // Local counter of audio buffers for synchronization.

+ uint32 buffer_index_;

+ // Peer's counter of audio buffers for synchronization.

+ const uint32* peer_buffer_index_;

+ // Callback for the renderer client. Called when the audio track was stopped.

+ const OnStoppedCB on_stopped_cb_;

+ DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSink);

+};

+} // namespace content

+#endif // CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_