content/renderer/media/speech_recognition_audio_source_provider_unittest.cc - Issue 499233003: Binding media stream audio track to speech recognition [renderer]

Side by Side Diff: content/renderer/media/speech_recognition_audio_source_provider_unittest.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Refactoring unit test and source provider, moved to media Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« content/renderer/media/speech_recognition_audio_source_provider.cc ('K') | « content/renderer/media/speech_recognition_audio_source_provider.cc ('k') | content/renderer/speech_recognition_dispatcher.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "content/renderer/media/speech_recognition_audio_source_provider.h"

	6

	7 #include "base/strings/utf_string_conversions.h"

	8 #include "content/renderer/media/mock_media_constraint_factory.h"

	9 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"

	10 #include "content/renderer/media/webrtc_local_audio_track.h"

	11 #include "media/audio/audio_parameters.h"

	12 #include "media/base/audio_bus.h"

	13 #include "testing/gmock/include/gmock/gmock.h"

	14 #include "testing/gtest/include/gtest/gtest.h"

	15 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"

	16

	17 namespace {

	18

	19 // Input audio format.

	20 static const media::AudioParameters::Format kInputFormat =

	21 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;

	22 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;

	23 const int kInputChannels = 1;

	24 const int kInputSampleRate = 44100;

	25 const int kInputBitsPerSample = 16;

	26 const int kInputFramesPerBuffer = 441;

	27

	28 // Output audio format.

	29 const media::AudioParameters::Format kOutputFormat =

	30 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;

	31 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;

	32 const int kOutputChannels = 2;

	33 const int kOutputSampleRate = 16000;

	34 const int kOutputBitsPerSample = 16;

	35 const int kOutputFramesPerBuffer = 1600;

	36

	37 // Minimal number of buffers which trigger a single SyncSocket transfer.

	38 const size_t kBuffersPerNotification =

	39 (kOutputFramesPerBuffer * kInputSampleRate) /

	40 (kInputFramesPerBuffer * kOutputSampleRate);

	41

	42 // Number of frames of the source audio.

	43 const size_t kSourceDataLength = kInputFramesPerBuffer * kInputChannels;

	44

	45 } // namespace

	46

	47 ////////////////////////////////////////////////////////////////////////////////

	48

	49 namespace content {

	50

	51 // Mocked out sockets used for Send/Receive.

	52 // Data is written and read from a shared buffer used as a FIFO and there is

	53 // no blocking. \|OnSendCB\| is used to trigger a \|Receive\| on the other socket.

	54 class MockSyncSocket : public base::SyncSocket {

	55 public:

	56 // This allows for 2 requests in queue between the \|MockSyncSocket\|s.

	57 static const int kSharedBufferSize = 8;

	58 // Buffer to be shared between two \|MockSyncSocket\|s. Allocated on heap.

	59 struct SharedBuffer {

	60 SharedBuffer() : start(0), length(0) {}

	61

	62 uint8 data[kSharedBufferSize];

	63 size_t start;

	64 size_t length;

	65 };

	66

	67 // Callback used for pairing an A.Send() with B.Receieve() without blocking.

	68 typedef base::Callback<void()> OnSendCB;

	69

	70 explicit MockSyncSocket(SharedBuffer* shared_buffer);

	71 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb);

	72

	73 virtual size_t Send(const void* buffer, size_t length) OVERRIDE;

	74 virtual size_t Receive(void* buffer, size_t length) OVERRIDE;

	75

	76 // When \|in_failure_mode_\| == true, the socket fails to send.

	77 void SetFailureMode(bool in_failure_mode) {

	78 in_failure_mode_ = in_failure_mode;

	79 }

	80

	81 private:

	82 SharedBuffer* buffer_;

	83 const OnSendCB on_send_cb_;

	84 bool in_failure_mode_;

	85 };

	86

	87 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer)

	88 : buffer_(buffer), in_failure_mode_(false) { }

	89

	90 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer, const OnSendCB& on_send_cb)

	91 : buffer_(buffer), on_send_cb_(on_send_cb), in_failure_mode_(false) {}

	92

	93 size_t MockSyncSocket::Send(const void* buffer, size_t length) {

	94 if (in_failure_mode_)

	95 return 0;

	96

	97 uint8* b = static_cast<uint8>(const_cast<void>(buffer));

	98 for (size_t i = 0; i < length; i++, buffer_->length++)

	99 buffer_->data[buffer_->start + buffer_->length] = b[i];

	100

	101 on_send_cb_.Run();

	102 return length;

	103 }

	104

	105 size_t MockSyncSocket::Receive(void* buffer, size_t length) {

	106 uint8* b = static_cast<uint8>(const_cast<void>(buffer));

	107 for (size_t i = buffer_->start; i < buffer_->length; i++, buffer_->start++)

	108 b[i] = buffer_->data[buffer_->start];

	109

	110 // Since buffer is used atomically, we can reset the buffer indices here.

	111 buffer_->start = buffer_->length = 0;

	112 return length;

	113 }

	114

	115 ////////////////////////////////////////////////////////////////////////////////

	116

	117 class FakeSpeechRecognizer {

	118 public:

	119 FakeSpeechRecognizer() : is_responsive_(true) {}

	120 ~FakeSpeechRecognizer() {}

	121

	122 void Initialize(

	123 const blink::WebMediaStreamTrack& track,

	124 const media::AudioParameters& sink_params,

	125 const SpeechRecognitionAudioSourceProvider::OnStoppedCB& on_stopped_cb);

	126

	127 // TODO(burnik): Move from the recognizer to the test.

	128 SpeechRecognitionAudioSourceProvider* SourceProvider() {

	129 return audio_source_provider_.get();

	130 }

	131

	132 // Emulates a single iteraton of a thread receiving on the socket.

	133 // Emulates the receive on the socket. This would normally be done on a

	134 // receiving thread's loop on the browser.

	135 void EmulateReceiveThreadLoopIteration() {

	136 // When not responsive do nothing as if the process is busy.

	137 if (!is_responsive_)

	138 return;

	139

	140 local_socket_->Receive(shared_buffer_index_, sizeof(*shared_buffer_index_));

	141 // Notify the producer that the audio buffer has been consumed.

	142 (*shared_buffer_index_)++;

	143 }

	144

	145 // Used to simulate an unresponsive behaviour of the consumer.

	146 void SimulateResponsiveness(bool is_responsive) {

	147 is_responsive_ = is_responsive;

	148 }

	149

	150 // Used to simulate a problem with sockets.

	151 void SetFailureModeOnForeignSocket(bool in_failure_mode) {

	152 foreign_socket_->SetFailureMode(in_failure_mode);

	153 }

	154

	155 uint32 buffer_index() { return *shared_buffer_index_; }

	156 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }

	157

	158 private:

	159 bool is_responsive_;

	160 // Shared memory for the audio and synchronization.

	161 scoped_ptr<base::SharedMemory> shared_memory_;

	162

	163 // Fake sockets shared buffer.

	164 scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_;

	165 scoped_ptr<MockSyncSocket> local_socket_;

	166 scoped_ptr<MockSyncSocket> foreign_socket_;

	167

	168 // Audio bus wrapping the shared memory from the renderer.

	169 scoped_ptr<media::AudioBus> audio_track_bus_;

	170

	171 uint32* shared_buffer_index_;

	172 // Producer. TODO(burnik): this should be outside the recognizer.

	173 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;

	174 };

	175

	176 void FakeSpeechRecognizer::Initialize(

	177 const blink::WebMediaStreamTrack& track,

	178 const media::AudioParameters& sink_params,

	179 const SpeechRecognitionAudioSourceProvider::OnStoppedCB& on_stopped_cb) {

	180 // Shared memory is allocated, mapped and shared.

	181 uint32 shared_memory_size = sizeof(media::AudioInputBufferParameters) +

	182 media::AudioBus::CalculateMemorySize(sink_params);

	183 shared_memory_.reset(new base::SharedMemory());

	184 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size));

	185

	186 base::SharedMemoryHandle foreign_memory_handle;

	187 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),

	188 &foreign_memory_handle));

	189

	190 media::AudioInputBuffer* buffer =

	191 static_cast<media::AudioInputBuffer*>(shared_memory_->memory());

	192 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);

	193

	194 // Reference to the counter used to synchronize.

	195 shared_buffer_index_ = &(buffer->params.size);

	196 *shared_buffer_index_ = 0U;

	197

	198 // Create a shared buffer for the \|MockSyncSocket\|s.

	199 shared_buffer_.reset(new MockSyncSocket::SharedBuffer());

	200

	201 // Local socket will receive signals from the producer.

	202 local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));

	203

	204 // We automatically trigger a Receive when data is sent over the socket.

	205 foreign_socket_.reset(new MockSyncSocket(

	206 shared_buffer_.get(),

	207 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,

	208 base::Unretained(this))));

	209

	210 // This is usually done to pair the sockets. Here it's not effective.

	211 base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_.get());

	212

	213 // Create the producer. TODO(burnik): move out of the recognizer.

	214 audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(

	215 track, sink_params, foreign_memory_handle, foreign_socket_.get(),

	216 on_stopped_cb));

	217 }

	218

	219 ////////////////////////////////////////////////////////////////////////////////

	220

	221 class SpeechRecognitionAudioSourceProviderTest : public testing::Test {

	222 public:

	223 SpeechRecognitionAudioSourceProviderTest() {

	224 // Audio Environment setup.

	225 source_params_.Reset(kInputFormat, kInputChannelLayout, kInputChannels,

	226 kInputSampleRate, kInputBitsPerSample,

	227 kInputFramesPerBuffer);

	228

	229 sink_params_.Reset(kOutputFormat, kOutputChannelLayout, kOutputChannels,

	230 kOutputSampleRate, kOutputBitsPerSample,

	231 kOutputFramesPerBuffer);

	232

	233 source_data_.reset(new int16[kSourceDataLength]);

	234

	235 // Prepare the track and audio source.

	236 blink::WebMediaStreamTrack blink_track;

	237 PrepareTrackWithMediaStreamType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track);

	238

	239 // Get the native track from the blink track and initialize.

	240 native_track_ =

	241 static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData());

	242 native_track_->OnSetFormat(source_params_);

	243

	244 // Create and initialize the consumer.

	245 recognizer_ = new FakeSpeechRecognizer();

	246 recognizer_->Initialize(

	247 blink_track, sink_params_,

	248 base::Bind(&SpeechRecognitionAudioSourceProviderTest::StoppedCallback,

	249 base::Unretained(this)));

	250

	251 // Init the producer.

	252 audio_source_provider_.reset(recognizer_->SourceProvider());

	253 }

	254

	255 // Mock callback for when the track is stopped.

	256 MOCK_METHOD0(StoppedCallback, void());

	257

	258 protected:

	259 static void PrepareTrackWithMediaStreamType(

	260 const MediaStreamType device_type,

	261 blink::WebMediaStreamTrack* blink_track) {

	262 // Device info.

	263 StreamDeviceInfo device_info(device_type, "Mock audio device",

	264 "mock_audio_device_id");

	265

	266 // Constraints.

	267 MockMediaConstraintFactory constraint_factory;

	268 const blink::WebMediaConstraints constraints =

	269 constraint_factory.CreateWebMediaConstraints();

	270

	271 // Capturer.

	272 scoped_refptr<WebRtcAudioCapturer> capturer(

	273 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL,

	274 NULL));

	275

	276 // Adapter.

	277 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(

	278 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));

	279

	280 // Native track.

	281 scoped_ptr<WebRtcLocalAudioTrack> native_track(

	282 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));

	283

	284 // Blink audio source.

	285 blink::WebMediaStreamSource blink_audio_source;

	286 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),

	287 blink::WebMediaStreamSource::TypeAudio,

	288 base::UTF8ToUTF16("dummy_source_name"));

	289 MediaStreamSource::SourceStoppedCallback cb;

	290 blink_audio_source.setExtraData(

	291 new MediaStreamAudioSource(-1, device_info, cb, NULL));

	292

	293 // Blink track.

	294 blink_track->initialize(blink::WebString::fromUTF8("audio_track"),

	295 blink_audio_source);

	296 blink_track->setExtraData(native_track.release());

	297 }

	298

	299 // Emulates an audio capture device capturing data from the source.

	300 inline void CaptureAudio(const size_t buffers) {

	301 for (size_t i = 0; i < buffers; ++i)

	302 native_track_->Capture(source_data_.get(),

	303 base::TimeDelta::FromMilliseconds(0), 1, false,

	304 false);

	305 }

	306

	307 // Helper method to verify captured audio data has been consumed.

	308 inline void AssertConsumedBuffers(const size_t buffer_index) {

	309 ASSERT_EQ(buffer_index, recognizer_->buffer_index());

	310 }

	311

	312 // Helper method to push audio data to producer and verify it was consumed.

	313 inline void CaptureAudioAndAssertConsumedBuffers(const size_t buffers,

	314 const size_t buffer_index) {

	315 CaptureAudio(buffers);

	316 AssertConsumedBuffers(buffer_index);

	317 }

	318

	319 // Producer.

	320 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;

	321 // Consumer.

	322 FakeSpeechRecognizer* recognizer_;

	323 // Audio related members.

	324 scoped_ptr<int16[]> source_data_;

	325 media::AudioParameters source_params_;

	326 media::AudioParameters sink_params_;

	327 WebRtcLocalAudioTrack* native_track_;

	328 };

	329

	330 ////////////////////////////////////////////////////////////////////////////////

	331

	332 TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckIsSupportedTrackType) {

	333 typedef std::map<MediaStreamType, bool> SupportedTrackPolicy;

	334 // This test must be aligned with the policy of supported tracks.

	335 SupportedTrackPolicy p;

	336 p[MEDIA_NO_SERVICE] = false;

	337 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // Only one supported for now.

	338 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;

	339 p[MEDIA_TAB_AUDIO_CAPTURE] = false;

	340 p[MEDIA_TAB_VIDEO_CAPTURE] = false;

	341 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;

	342 p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;

	343 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;

	344 // Ensure this test gets updated along with \|content::MediaStreamType\| enum.

	345 EXPECT_EQ(NUM_MEDIA_TYPES, p.size());

	346 // Check the the entire policy.

	347 for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) {

	348 blink::WebMediaStreamTrack blink_track;

	349 PrepareTrackWithMediaStreamType(it->first, &blink_track);

	350 ASSERT_EQ(

	351 it->second,

	352 SpeechRecognitionAudioSourceProvider::IsSupportedTrack(blink_track));

	353 }

	354 }

	355

	356 TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) {

	357 AssertConsumedBuffers(0U);

	358 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);

	359 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U);

	360 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 3U);

	361 }

	362

	363 TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) {

	364 // Fill audio input frames with 0, 1, 2, 3, ..., 440.

	365 for (size_t i = 0; i < kSourceDataLength; ++i)

	366 source_data_[i] = i;

	367

	368 const size_t num_frames_to_test = 12;

	369 const size_t sink_data_length = kOutputFramesPerBuffer * kOutputChannels;

	370 int16 sink_data[sink_data_length];

	371 media::AudioBus* sink_bus = recognizer_->audio_bus();

	372

	373 // Render the audio data from the recognizer.

	374 sink_bus->ToInterleaved(sink_bus->frames(),

	375 sink_params_.bits_per_sample() / 8, sink_data);

	376

	377 // Test both channels are zeroed out before we trigger resampling.

	378 for (size_t i = 0; i < num_frames_to_test; ++i) {

	379 ASSERT_EQ(0, sink_data[i * 2]);

	380 ASSERT_EQ(0, sink_data[i * 2 + 1]);

	381 }

	382

	383 // Trigger the source provider to resample the input data.

	384 AssertConsumedBuffers(0U);

	385 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);

	386

	387 // Render the audio data from the recognizer.

	388 sink_bus->ToInterleaved(sink_bus->frames(),

	389 sink_params_.bits_per_sample() / 8, sink_data);

	390

	391 // Resampled data expected frames - based on \|source_data_\|.

	392 // Note: these values also depend on input/output audio params.

	393 const int16 expected_data[num_frames_to_test] = {0, 2, 5, 8, 11, 13,

	394 16, 19, 22, 24, 27, 30};

	395

	396 // Test both channels have same resampled data.

	397 for (size_t i = 0; i < num_frames_to_test; ++i) {

	398 ASSERT_EQ(expected_data[i], sink_data[i * 2]);

	399 ASSERT_EQ(expected_data[i], sink_data[i * 2 + 1]);

	400 }

	401 }

	402

	403 TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) {

	404 // (2) Start out with no problems.

	405 AssertConsumedBuffers(0U);

	406 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);

	407

	408 // (2) A failure occurs (socket cannot to send).

	409 recognizer_->SetFailureModeOnForeignSocket(true);

	410 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);

	411 }

	412

	413 TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) {

	414 AssertConsumedBuffers(0U);

	415 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);

	416 EXPECT_CALL(*this, StoppedCallback()).Times(1);

	417

	418 native_track_->Stop();

	419 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);

	420 }

	421

	422 } // namespace content

OLD	NEW