content/renderer/media/speech_recognition_audio_sink.h - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Side by Side Diff: content/renderer/media/speech_recognition_audio_sink.h

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Nits, comments, refactoring, rebasing. Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #ifndef CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_

	6 #define CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_

	7

	8 #include "base/callback.h"

	9 #include "base/memory/scoped_ptr.h"

	10 #include "base/memory/shared_memory.h"

	11 #include "base/sync_socket.h"

	12 #include "base/threading/thread_checker.h"

	13 #include "content/common/content_export.h"

	14 #include "content/public/renderer/media_stream_audio_sink.h"

	15 #include "content/renderer/media/media_stream_audio_source.h"

	16 #include "media/audio/audio_parameters.h"

	17 #include "media/base/audio_converter.h"

	18 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"

	19

	20 namespace media {

	21 class AudioBus;

	22 class AudioFifo;

	23 }

	24

	25 namespace content {

	26

	27 // SpeechRecognitionAudioSink works as an audio sink to the

	28 // WebRtcLocalAudioTrack. It stores the capture data into a FIFO.

	29 // When the FIFO has enough data for resampling, it converts it,

	30 // passes the buffer to the WebSpeechRecognizer via SharedMemory

	31 // and notifies it via SyncSocket followed by incrementing the \|buffer_index_\|.

	32 // WebSpeechRecognizer increments the shared buffer index to synchronize.
	henrika (OOO until Aug 14) 2014/09/29 10:38:41 Can you clarify? What is synchronized and how does Can you clarify? What is synchronized and how does it work? burnik 2014/09/29 12:07:31 The buffer indices are synchronized. Detailed in d Show quoted text On 2014/09/29 10:38:41, henrika wrote: > Can you clarify? What is synchronized and how does it work? The buffer indices are synchronized. Detailed in design doc. http://goo.gl/9Ot3PC
	33 class CONTENT_EXPORT SpeechRecognitionAudioSink

	34 : NON_EXPORTED_BASE(public media::AudioConverter::InputCallback),

	35 NON_EXPORTED_BASE(public MediaStreamAudioSink) {

	36 public:

	37 typedef base::Callback<void()> OnStoppedCB;

	38

	39 SpeechRecognitionAudioSink(/* ExtraData reference is copied from track. */

	40 const blink::WebMediaStreamTrack& track,

	41

	42 /* Output audio parameters are copied. */

	43 const media::AudioParameters& params,

	44

	45 /* Handle is used to map to existing memory. */

	46 const base::SharedMemoryHandle memory,

	47

	48 /* Socket ownership is passed to here. */
	henrika (OOO until Aug 14) 2014/09/29 10:38:41 'to here' sounds odd. Can you rewrite? 'to here' sounds odd. Can you rewrite? burnik 2014/09/29 12:07:31 Done: Socket ownership is transferred. Show quoted text On 2014/09/29 10:38:41, henrika wrote: > 'to here' sounds odd. Can you rewrite? Done: Socket ownership is transferred.
	49 scoped_ptr<base::SyncSocket> socket,

	50

	51 /* Callback is stored by copy. */

	52 const OnStoppedCB& on_stopped_cb);

	53

	54 virtual ~SpeechRecognitionAudioSink();

	55

	56 // Returns whether the provided track is supported.

	57 static bool IsSupportedTrack(const blink::WebMediaStreamTrack& track);

	58

	59 private:

	60 // content::MediaStreamAudioSink implementation.

	61 virtual void OnReadyStateChanged(

	62 blink::WebMediaStreamSource::ReadyState state) OVERRIDE;

	63

	64 virtual void OnData(const int16* audio_data, int sample_rate,

	65 int number_of_channels, int number_of_frames) OVERRIDE;

	66 virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE;

	67

	68 // media::AudioConverter::Inputcallback implementation.

	69 virtual double ProvideInput(media::AudioBus* audio_bus,

	70 base::TimeDelta buffer_delay) OVERRIDE;

	71

	72 // Number of frames per buffer in FIFO. When the buffer is full we convert and

	73 // consume it on the \|output_bus_\|. Size of the buffer depends on the

	74 // resampler. Example: for 44.1 to 16.0 conversion, it should be 4100 frames.

	75 int fifo_buffer_size_;

	76

	77 // Used to DCHECK that some methods are called on the main render thread.

	78 base::ThreadChecker main_render_thread_checker_;

	79

	80 // Used to DCHECK that some methods are called on the capture audio thread.

	81 base::ThreadChecker capture_thread_checker_;

	82

	83 // The audio track that this audio sink is connected to.

	84 const blink::WebMediaStreamTrack track_;

	85

	86 // Shared memory used by audio buses on both browser and renderer processes.

	87 base::SharedMemory shared_memory_;

	88

	89 // Socket for synchronization of audio bus reads/writes.

	90 // Created on the renderer client and passed here. Accessed on capture thread.

	91 scoped_ptr<base::SyncSocket> socket_;

	92

	93 // Used as a resampler to deliver appropriate format to speech recognition.

	94 scoped_ptr<media::AudioConverter> audio_converter_;

	95

	96 // FIFO is used for queuing audio frames before we resample.

	97 scoped_ptr<media::AudioFifo> fifo_;

	98

	99 // Audio delivered from source.

	100 scoped_ptr<media::AudioBus> input_bus_;

	101

	102 // Audio bus shared with the browser process via \|shared_memory_\|.

	103 scoped_ptr<media::AudioBus> output_bus_;

	104

	105 // Params of the source audio. Can change when \|OnSetFormat()\| occurs.

	106 media::AudioParameters input_params_;

	107

	108 // Params used by speech recognition.

	109 const media::AudioParameters output_params_;

	110

	111 // Whether the track has been stopped.

	112 bool track_stopped_;

	113

	114 // Local counter of audio buffers for synchronization.

	115 uint32 buffer_index_;

	116

	117 // Peer's counter of audio buffers for synchronization.

	118 const uint32* peer_buffer_index_;

	119

	120 // Callback for the renderer client. Called when the audio track was stopped.

	121 const OnStoppedCB on_stopped_cb_;

	122

	123 DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSink);

	124 };

	125

	126 } // namespace content

	127

	128 #endif // CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_

OLD	NEW