Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1340)

Side by Side Diff: content/renderer/speech_recognition_audio_source_provider_unittest.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add unit test and refactor Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/speech_recognition_audio_source_provider.h"
6
7 #include "base/logging.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/renderer/media/media_stream_audio_source.h"
10 #include "content/renderer/media/mock_media_constraint_factory.h"
11 #include "content/renderer/media/rtc_media_constraints.h"
12 #include "content/renderer/media/webrtc/mock_peer_connection_dependency_factory. h"
13 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
14 #include "content/renderer/media/webrtc_audio_capturer.h"
15 #include "content/renderer/media/webrtc_audio_device_impl.h"
16 #include "content/renderer/media/webrtc_local_audio_source_provider.h"
17 #include "content/renderer/media/webrtc_local_audio_track.h"
18 #include "media/audio/audio_parameters.h"
19 #include "media/base/audio_bus.h"
20 #include "media/base/audio_capturer_source.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
24
25 namespace content {
26
27 ////////////////////////////////////////////////////////////////////////////////
28
29 // Buffer to be shared between two fake sockets.
burnik 2014/09/12 12:09:12 'fake' is interchangeable with 'mock' regarding so
burnik 2014/09/15 15:00:07 Done.
30 struct SharedBuffer {
31 uint8 data[100000];
no longer working on chromium 2014/09/15 08:31:29 noooooo, you can't allocate 100000 bytes in stack
burnik 2014/09/15 15:00:07 This is allocated on and owned by the FakeSpeechRe
32 size_t start;
33 size_t length;
34 };
35
36 ////////////////////////////////////////////////////////////////////////////////
37
38 // Fake socket used for Send/Receive.
39 // Data is written and read from a shared buffer used as a FIFO and there is
40 // no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
41 class MockSyncSocket : public base::SyncSocket {
42 public:
43 typedef base::Callback<void()> OnSendCB;
44
45 explicit MockSyncSocket(SharedBuffer* shared_buffer);
46 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb);
47
48 virtual size_t Send(const void* buffer, size_t length) OVERRIDE;
49 virtual size_t Receive(void* buffer, size_t length) OVERRIDE;
50
51 // When |in_failure_mode_| == true, the socket fails to send.
52 void SetFailureMode(bool in_failure_mode) {
53 in_failure_mode_ = in_failure_mode;
54 }
55
56 private:
57 SharedBuffer* buffer_;
58 const OnSendCB on_send_cb_;
59 bool in_failure_mode_;
60 };
61
62 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer)
63 : buffer_(buffer), in_failure_mode_(false) {}
64
65 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer, const OnSendCB& on_send_cb)
66 : buffer_(buffer), on_send_cb_(on_send_cb), in_failure_mode_(false) {}
67
68 size_t MockSyncSocket::Send(const void* buffer, size_t length) {
69 if (in_failure_mode_) return 0;
70 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
71 for (size_t i = 0; i < length; i++, buffer_->length++)
72 buffer_->data[buffer_->start + buffer_->length] = b[i];
73 on_send_cb_.Run();
74 return length;
75 }
76
77 size_t MockSyncSocket::Receive(void* buffer, size_t length) {
78 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
79 for (size_t i = buffer_->start; i < buffer_->length; i++, buffer_->start++)
80 b[i] = buffer_->data[buffer_->start];
81 return length;
82 }
83
84 ////////////////////////////////////////////////////////////////////////////////
85
86 class FakeSpeechRecognizer {
87 public:
88 FakeSpeechRecognizer() : is_responsive_(true) {}
89 ~FakeSpeechRecognizer() {}
90
91 void Initialize(
92 const blink::WebMediaStreamTrack& track,
93 const media::AudioParameters& sink_params,
94 const SpeechRecognitionAudioSourceProvider::OnErrorCB& on_error_cb);
95
96 // TODO(burnik): Move from the recognizer to test.
97 SpeechRecognitionAudioSourceProvider* SourceProvider();
98
99 // Emulates a single iteraton of a thread receiving on the socket.
100 virtual void EmulateReceiveThreadLoopIteration();
101
102 // Used to simulate an unresponsive behaviour of the consumer.
103 void SimulateResponsiveness(bool is_responsive) {
104 is_responsive_ = is_responsive;
105 }
106 // Used to simulate a problem with sockets.
107 void SetFailureModeOnForeignSocket(bool in_failure_mode) {
108 DCHECK(foreign_socket_.get());
109 foreign_socket_->SetFailureMode(in_failure_mode);
110 }
111
112 uint32 buffer_index() { return *shared_buffer_index_; }
113 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
114
115 private:
116 bool is_responsive_;
117 // Shared memory for the audio and synchronization.
118 scoped_ptr<base::SharedMemory> shared_memory_;
119
120 // Fake sockets shared buffer.
121 scoped_ptr<SharedBuffer> shared_buffer_;
122 scoped_ptr<MockSyncSocket> local_socket_;
123 scoped_ptr<MockSyncSocket> foreign_socket_;
124
125 // Audio bus wrapping the shared memory from the renderer.
126 scoped_ptr<media::AudioBus> audio_track_bus_;
127
128 uint32* shared_buffer_index_;
129 // Producer. TODO(burnik): this should be outside the recognizer.
130 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
131 };
132
133 void FakeSpeechRecognizer::Initialize(
134 const blink::WebMediaStreamTrack& track,
135 const media::AudioParameters& sink_params,
136 const SpeechRecognitionAudioSourceProvider::OnErrorCB& on_error_cb) {
137 // Shared memory is allocated, mapped and shared.
138 uint32 shared_memory_size = sizeof(media::AudioInputBufferParameters) +
139 media::AudioBus::CalculateMemorySize(sink_params);
140 shared_memory_.reset(new base::SharedMemory());
141
142 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size))
143 << "Failed to create shared memory";
144
145 base::SharedMemoryHandle foreign_memory_handle;
146 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
147 &foreign_memory_handle))
148 << "Failed to share memory";
149
150 media::AudioInputBuffer* buffer =
151 static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
152 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
153
154 // Reference to the counter used to synchronize.
155 shared_buffer_index_ = &(buffer->params.size);
156 *shared_buffer_index_ = 0U;
157
158 // Create a shared buffer for the |MockSyncSocket|s.
159 shared_buffer_.reset(new SharedBuffer());
160 ASSERT_EQ(shared_buffer_->start, 0U);
161 ASSERT_EQ(shared_buffer_->length, 0U);
162
163 // Local socket will receive signals from the producer.
164 local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
165
166 // We automatically trigger a Receive when data is sent over the socket.
167 foreign_socket_.reset(new MockSyncSocket(
168 shared_buffer_.get(),
169 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
170 base::Unretained(this))));
171
172 // This is usually done to pair the sockets. Here it's not effective.
173 base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_.get());
174
175 // Create the producer. TODO(burnik): move out of the recognizer.
176 audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(
177 track, sink_params, foreign_memory_handle, foreign_socket_.get(),
178 on_error_cb));
179 }
180
181 // TODO(burnik): Remove from the recognizer.
182 SpeechRecognitionAudioSourceProvider* FakeSpeechRecognizer::SourceProvider() {
183 return audio_source_provider_.get();
184 }
185
186 // Emulates the receive on the socket. This would normally be done on a
187 // receiving thread's loop on the browser.
188 void FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration() {
189 // When not responsive do nothing as if the process is busy.
190 if (!is_responsive_) return;
191 local_socket_->Receive(shared_buffer_index_, sizeof(*shared_buffer_index_));
192 // Notify the producer that the audio buffer has been consumed.
193 (*shared_buffer_index_)++;
194 }
195
196 ////////////////////////////////////////////////////////////////////////////////
197
198 // Input audio format
199 static const media::AudioParameters::Format kInputFormat =
200 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
201 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
202 const int kInputChannels = 1;
203 const int kInputSampleRate = 44100;
204 const int kInputBitsPerSample = 16;
205 const int kInputFramesPerBuffer = 441;
206
207 // Output audio format
208 const media::AudioParameters::Format kOutputFormat =
209 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
210 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
211 const int kOutputChannels = 2;
212 const int kOutputSampleRate = 16000;
213 const int kOutputBitsPerSample = 16;
214 const int kOutputFramesPerBuffer = 1600;
215
216 // Minimal number of buffers which trigger a single socket transfer.
217 const size_t kBuffersPerNotification =
218 (kOutputFramesPerBuffer * kInputSampleRate) /
219 (kInputFramesPerBuffer * kOutputSampleRate);
220
221 // Number of buffers which make the FIFO ready for consumption.
222 const size_t kBuffersForReadyFifo =
223 (kOutputFramesPerBuffer * kInputSampleRate) / kOutputSampleRate;
224
225 ////////////////////////////////////////////////////////////////////////////////
226
227 class SpeechRecognitionAudioSourceProviderTest : public testing::Test {
228 public:
229 SpeechRecognitionAudioSourceProviderTest() {}
230
231 // Mock for error callback.
232 MOCK_METHOD1(ErrorCallback,
233 void(SpeechRecognitionAudioSourceProvider::ErrorState));
234
235 // testing::Test methods.
236 virtual void SetUp() OVERRIDE {
237 // Audio Environment setup.
238 source_params_.Reset(kInputFormat, kInputChannelLayout, kInputChannels,
239 kInputSampleRate, kInputBitsPerSample,
240 kInputFramesPerBuffer);
241
242 sink_params_.Reset(kOutputFormat, kOutputChannelLayout, kOutputChannels,
243 kOutputSampleRate, kOutputBitsPerSample,
244 kOutputFramesPerBuffer);
245
246 source_data_length_ =
247 source_params_.frames_per_buffer() * source_params_.channels();
248 source_data_.reset(new int16[source_data_length_]);
249
250 MockMediaConstraintFactory constraint_factory;
251 scoped_refptr<WebRtcAudioCapturer> capturer(
252 WebRtcAudioCapturer::CreateCapturer(
253 -1, StreamDeviceInfo(),
254 constraint_factory.CreateWebMediaConstraints(), NULL, NULL));
255 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
256 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
257 native_track_ = new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL);
258 native_track_->OnSetFormat(source_params_);
259
260 blink::WebMediaStreamSource audio_source;
261 audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
262 blink::WebMediaStreamSource::TypeAudio,
263 base::UTF8ToUTF16("dummy_source_name"));
264 blink_track_.initialize(blink::WebString::fromUTF8("audio_track"),
265 audio_source);
266 blink_track_.setExtraData(native_track_);
267
268 // Create the consumer.
269 recognizer_ = new FakeSpeechRecognizer();
270 recognizer_->Initialize(
271 blink_track_, sink_params_,
272 base::Bind(&SpeechRecognitionAudioSourceProviderTest::ErrorCallback,
273 base::Unretained(this)));
274
275 // Init the producer.
276 audio_source_provider_.reset(recognizer_->SourceProvider());
277 }
278
279 virtual void TearDown() OVERRIDE { blink_track_.reset(); }
280
281 protected:
282 // TODO(burnik): Recheck steps and simplify method. Try reusing in |SetUp()|.
283 static blink::WebMediaStreamTrack CreateBlinkTrackWithMediaStreamType(
284 const MediaStreamType device_type) {
285 MockMediaConstraintFactory constraint_factory;
286
287 MediaStreamSource::SourceStoppedCallback cb;
288
289 StreamDeviceInfo device_info(device_type, "Mock audio device",
290 "mock_audio_device_id");
291 WebRtcAudioDeviceImpl* device = new WebRtcAudioDeviceImpl();
292 scoped_ptr<MediaStreamAudioSource> stream_audio_source(
293 new MediaStreamAudioSource(-1, device_info, cb, NULL));
294 const blink::WebMediaConstraints constraints =
295 constraint_factory.CreateWebMediaConstraints();
296 MockPeerConnectionDependencyFactory* factory =
297 new MockPeerConnectionDependencyFactory();
298 scoped_refptr<webrtc::AudioSourceInterface> audio_source =
299 factory->CreateLocalAudioSource(new RTCMediaConstraints(constraints));
300 scoped_refptr<WebRtcAudioCapturer> capturer(
301 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints,
302 device, stream_audio_source.get()));
303 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
304 WebRtcLocalAudioTrackAdapter::Create(std::string(),
305 audio_source.get()));
306 scoped_ptr<WebRtcLocalAudioTrack> native_track(
307 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
308
309 blink::WebMediaStreamSource blink_audio_source;
310 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
311 blink::WebMediaStreamSource::TypeAudio,
312 base::UTF8ToUTF16("dummy_source_name"));
313 blink_audio_source.setExtraData(stream_audio_source.release());
314
315 blink::WebMediaStreamTrack blink_track;
316 blink_track.initialize(blink::WebString::fromUTF8("audio_track"),
317 blink_audio_source);
318 blink_track.setExtraData(native_track.release());
319
320 return blink_track;
321 }
322
323 // Emulates an audio capture device capturing data from the source.
324 inline void CaptureAudio(const size_t buffers) {
325 DCHECK(native_track_);
326 for (size_t i = 0; i < buffers; ++i)
327 native_track_->Capture(source_data_.get(),
328 base::TimeDelta::FromMilliseconds(0), 1, false,
329 false);
330 }
331
332 // Helper method to verify captured audio data has been consumed.
333 inline void AssertConsumedBuffers(const size_t buffer_index) {
334 ASSERT_EQ(buffer_index, recognizer_->buffer_index());
335 }
336
337 // Helper method to push audio data to producer and verify it was consumed.
338 inline void CaptureAudioAndAssertConsumedBuffers(const size_t buffers,
339 const size_t buffer_index) {
340 CaptureAudio(buffers);
341 AssertConsumedBuffers(buffer_index);
342 }
343
344 protected:
345 // Producer.
346 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
347 // Consumer.
348 FakeSpeechRecognizer* recognizer_;
349 // Audio related members.
350 size_t source_data_length_;
351 media::AudioParameters source_params_;
352 scoped_ptr<int16[]> source_data_;
353 size_t sink_data_length_;
354 media::AudioParameters sink_params_;
355 blink::WebMediaStreamTrack blink_track_;
356 WebRtcLocalAudioTrack* native_track_;
357 };
358
359 ////////////////////////////////////////////////////////////////////////////////
360 ////////////////////////////////////////////////////////////////////////////////
361
362 TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckAllowedAudioTrackType) {
363 typedef std::map<MediaStreamType, bool> AllowedAudioTrackSourceTypePolicy;
364 // This test must be aligned with the policy of allowed tracks.
365 AllowedAudioTrackSourceTypePolicy p;
366 p[MEDIA_NO_SERVICE] = false;
367 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // Only one allowed for now.
368 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
369 p[MEDIA_TAB_AUDIO_CAPTURE] = false;
370 p[MEDIA_TAB_VIDEO_CAPTURE] = false;
371 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
372 p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;
373 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
374 // Ensure this test gets updated along with |content::MediaStreamType| enum.
375 EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
376 // Check the the entire policy.
377 for (AllowedAudioTrackSourceTypePolicy::iterator it = p.begin();
378 it != p.end(); ++it) {
379 ASSERT_EQ(it->second,
380 SpeechRecognitionAudioSourceProvider::IsAllowedAudioTrack(
381 CreateBlinkTrackWithMediaStreamType(it->first)));
382 }
383 }
384
385 TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) {
386 AssertConsumedBuffers(0U);
387 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
388 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U);
389 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 3U);
390 }
391
392 TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) {
393 // fill audio input frames with 0,1,2,3,...,440
394 for (size_t i = 0; i < source_data_length_; ++i) source_data_[i] = i;
395
396 const size_t num_frames_to_test = 12;
397 int16 sink_data[kOutputFramesPerBuffer * kOutputChannels];
398 media::AudioBus* sink_bus = recognizer_->audio_bus();
399
400 // Render the audio data from the recognizer.
401 sink_bus->ToInterleaved(sink_bus->frames(),
402 sink_params_.bits_per_sample() / 8, sink_data);
403
404 // Test both channels are zeroed out before we trigger resampling.
405 for (size_t i = 0; i < num_frames_to_test; ++i) {
406 ASSERT_EQ(0, sink_data[i * 2]);
407 ASSERT_EQ(0, sink_data[i * 2 + 1]);
408 }
409
410 // Trigger the source provider to resample the input data.
411 AssertConsumedBuffers(0U);
412 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
413
414 // Render the audio data from the recognizer.
415 sink_bus->ToInterleaved(sink_bus->frames(),
416 sink_params_.bits_per_sample() / 8, sink_data);
417
418 // Resampled data expected frames - based on |source_data_|.
419 // Note: these values also depend on input/output audio params.
420 const int16 expected_data[num_frames_to_test] = {0, 2, 5, 8, 11, 13,
421 16, 19, 22, 24, 27, 30};
422
423 // Test both channels have same resampled data.
424 for (size_t i = 0; i < num_frames_to_test; ++i) {
425 ASSERT_EQ(expected_data[i], sink_data[i * 2]);
426 ASSERT_EQ(expected_data[i], sink_data[i * 2 + 1]);
427 }
428 }
429
430 TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) {
431 // (2) Start out with no problems.
432 AssertConsumedBuffers(0U);
433 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
434
435 // (2) A failure occurs (socket cannot to send).
436 recognizer_->SetFailureModeOnForeignSocket(true);
437 EXPECT_CALL(*this,
438 ErrorCallback(SpeechRecognitionAudioSourceProvider::SEND_FAILED))
439 .Times(1);
440 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
441
442 // (3) Miraculasly recovered from the socket failure.
burnik 2014/09/12 12:09:12 * Miraculously
burnik 2014/09/15 15:00:07 Done.
443 recognizer_->SetFailureModeOnForeignSocket(false);
444 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U);
445 }
446
447 TEST_F(SpeechRecognitionAudioSourceProviderTest, PeerProcessGotUnresponsive) {
448 EXPECT_GT(kBuffersForReadyFifo, kBuffersPerNotification);
449 AssertConsumedBuffers(0U);
450
451 // (1) We respond to audio packets as expected.
452 recognizer_->SimulateResponsiveness(true);
453 // First round of input has to have one additional buffer
burnik 2014/09/12 12:09:12 This comment is deprecated.
burnik 2014/09/15 15:00:07 Done.
454 // to trigger processing.
455 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
456
457 // (2) The recognizer on the browser becomes unresponsive.
458 recognizer_->SimulateResponsiveness(false);
459 EXPECT_CALL(*this, ErrorCallback(
460 SpeechRecognitionAudioSourceProvider::BUFFER_SYNC_LAG))
461 .Times(testing::AtLeast(1));
462 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
463
464 // (2) The producer gets an overflow.
465 EXPECT_CALL(
466 *this,
467 ErrorCallback(SpeechRecognitionAudioSourceProvider::AUDIO_FIFO_OVERFLOW))
468 .Times(testing::AtLeast(1));
469 CaptureAudioAndAssertConsumedBuffers(kBuffersForReadyFifo, 1U);
470 }
471
472 TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) {
473 AssertConsumedBuffers(0U);
474 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
475 EXPECT_CALL(
476 *this, ErrorCallback(SpeechRecognitionAudioSourceProvider::TRACK_STOPPED))
477 .Times(1);
478
479 native_track_->Stop();
480 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
481 }
482
483 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698