Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(239)

Side by Side Diff: content/renderer/media/speech_recognition_audio_source_provider_unittest.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Refactoring unit test and source provider, moved to media Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/media/speech_recognition_audio_source_provider.h"
6
7 #include "base/strings/utf_string_conversions.h"
8 #include "content/renderer/media/mock_media_constraint_factory.h"
9 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
10 #include "content/renderer/media/webrtc_local_audio_track.h"
11 #include "media/audio/audio_parameters.h"
12 #include "media/base/audio_bus.h"
13 #include "testing/gmock/include/gmock/gmock.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
16
17 namespace {
18
19 // Input audio format.
20 static const media::AudioParameters::Format kInputFormat =
21 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
22 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
23 const int kInputChannels = 1;
24 const int kInputSampleRate = 44100;
25 const int kInputBitsPerSample = 16;
26 const int kInputFramesPerBuffer = 441;
27
28 // Output audio format.
29 const media::AudioParameters::Format kOutputFormat =
30 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
31 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
32 const int kOutputChannels = 2;
33 const int kOutputSampleRate = 16000;
34 const int kOutputBitsPerSample = 16;
35 const int kOutputFramesPerBuffer = 1600;
36
37 // Minimal number of buffers which trigger a single SyncSocket transfer.
38 const size_t kBuffersPerNotification =
39 (kOutputFramesPerBuffer * kInputSampleRate) /
40 (kInputFramesPerBuffer * kOutputSampleRate);
41
42 // Number of frames of the source audio.
43 const size_t kSourceDataLength = kInputFramesPerBuffer * kInputChannels;
44
45 } // namespace
46
47 ////////////////////////////////////////////////////////////////////////////////
48
49 namespace content {
50
51 // Mocked out sockets used for Send/Receive.
52 // Data is written and read from a shared buffer used as a FIFO and there is
53 // no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
54 class MockSyncSocket : public base::SyncSocket {
55 public:
56 // This allows for 2 requests in queue between the |MockSyncSocket|s.
57 static const int kSharedBufferSize = 8;
58 // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap.
59 struct SharedBuffer {
60 SharedBuffer() : start(0), length(0) {}
61
62 uint8 data[kSharedBufferSize];
63 size_t start;
64 size_t length;
65 };
66
67 // Callback used for pairing an A.Send() with B.Receieve() without blocking.
68 typedef base::Callback<void()> OnSendCB;
69
70 explicit MockSyncSocket(SharedBuffer* shared_buffer);
71 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb);
72
73 virtual size_t Send(const void* buffer, size_t length) OVERRIDE;
74 virtual size_t Receive(void* buffer, size_t length) OVERRIDE;
75
76 // When |in_failure_mode_| == true, the socket fails to send.
77 void SetFailureMode(bool in_failure_mode) {
78 in_failure_mode_ = in_failure_mode;
79 }
80
81 private:
82 SharedBuffer* buffer_;
83 const OnSendCB on_send_cb_;
84 bool in_failure_mode_;
85 };
86
87 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer)
88 : buffer_(buffer), in_failure_mode_(false) { }
89
90 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer, const OnSendCB& on_send_cb)
91 : buffer_(buffer), on_send_cb_(on_send_cb), in_failure_mode_(false) {}
92
93 size_t MockSyncSocket::Send(const void* buffer, size_t length) {
94 if (in_failure_mode_)
95 return 0;
96
97 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
98 for (size_t i = 0; i < length; i++, buffer_->length++)
99 buffer_->data[buffer_->start + buffer_->length] = b[i];
100
101 on_send_cb_.Run();
102 return length;
103 }
104
105 size_t MockSyncSocket::Receive(void* buffer, size_t length) {
106 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
107 for (size_t i = buffer_->start; i < buffer_->length; i++, buffer_->start++)
108 b[i] = buffer_->data[buffer_->start];
109
110 // Since buffer is used atomically, we can reset the buffer indices here.
111 buffer_->start = buffer_->length = 0;
112 return length;
113 }
114
115 ////////////////////////////////////////////////////////////////////////////////
116
117 class FakeSpeechRecognizer {
118 public:
119 FakeSpeechRecognizer() : is_responsive_(true) {}
120 ~FakeSpeechRecognizer() {}
121
122 void Initialize(
123 const blink::WebMediaStreamTrack& track,
124 const media::AudioParameters& sink_params,
125 const SpeechRecognitionAudioSourceProvider::OnStoppedCB& on_stopped_cb);
126
127 // TODO(burnik): Move from the recognizer to the test.
128 SpeechRecognitionAudioSourceProvider* SourceProvider() {
129 return audio_source_provider_.get();
130 }
131
132 // Emulates a single iteraton of a thread receiving on the socket.
133 // Emulates the receive on the socket. This would normally be done on a
134 // receiving thread's loop on the browser.
135 void EmulateReceiveThreadLoopIteration() {
136 // When not responsive do nothing as if the process is busy.
137 if (!is_responsive_)
138 return;
139
140 local_socket_->Receive(shared_buffer_index_, sizeof(*shared_buffer_index_));
141 // Notify the producer that the audio buffer has been consumed.
142 (*shared_buffer_index_)++;
143 }
144
145 // Used to simulate an unresponsive behaviour of the consumer.
146 void SimulateResponsiveness(bool is_responsive) {
147 is_responsive_ = is_responsive;
148 }
149
150 // Used to simulate a problem with sockets.
151 void SetFailureModeOnForeignSocket(bool in_failure_mode) {
152 foreign_socket_->SetFailureMode(in_failure_mode);
153 }
154
155 uint32 buffer_index() { return *shared_buffer_index_; }
156 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
157
158 private:
159 bool is_responsive_;
160 // Shared memory for the audio and synchronization.
161 scoped_ptr<base::SharedMemory> shared_memory_;
162
163 // Fake sockets shared buffer.
164 scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_;
165 scoped_ptr<MockSyncSocket> local_socket_;
166 scoped_ptr<MockSyncSocket> foreign_socket_;
167
168 // Audio bus wrapping the shared memory from the renderer.
169 scoped_ptr<media::AudioBus> audio_track_bus_;
170
171 uint32* shared_buffer_index_;
172 // Producer. TODO(burnik): this should be outside the recognizer.
173 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
174 };
175
176 void FakeSpeechRecognizer::Initialize(
177 const blink::WebMediaStreamTrack& track,
178 const media::AudioParameters& sink_params,
179 const SpeechRecognitionAudioSourceProvider::OnStoppedCB& on_stopped_cb) {
180 // Shared memory is allocated, mapped and shared.
181 uint32 shared_memory_size = sizeof(media::AudioInputBufferParameters) +
182 media::AudioBus::CalculateMemorySize(sink_params);
183 shared_memory_.reset(new base::SharedMemory());
184 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size));
185
186 base::SharedMemoryHandle foreign_memory_handle;
187 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
188 &foreign_memory_handle));
189
190 media::AudioInputBuffer* buffer =
191 static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
192 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
193
194 // Reference to the counter used to synchronize.
195 shared_buffer_index_ = &(buffer->params.size);
196 *shared_buffer_index_ = 0U;
197
198 // Create a shared buffer for the |MockSyncSocket|s.
199 shared_buffer_.reset(new MockSyncSocket::SharedBuffer());
200
201 // Local socket will receive signals from the producer.
202 local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
203
204 // We automatically trigger a Receive when data is sent over the socket.
205 foreign_socket_.reset(new MockSyncSocket(
206 shared_buffer_.get(),
207 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
208 base::Unretained(this))));
209
210 // This is usually done to pair the sockets. Here it's not effective.
211 base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_.get());
212
213 // Create the producer. TODO(burnik): move out of the recognizer.
214 audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(
215 track, sink_params, foreign_memory_handle, foreign_socket_.get(),
216 on_stopped_cb));
217 }
218
219 ////////////////////////////////////////////////////////////////////////////////
220
221 class SpeechRecognitionAudioSourceProviderTest : public testing::Test {
222 public:
223 SpeechRecognitionAudioSourceProviderTest() {
224 // Audio Environment setup.
225 source_params_.Reset(kInputFormat, kInputChannelLayout, kInputChannels,
226 kInputSampleRate, kInputBitsPerSample,
227 kInputFramesPerBuffer);
228
229 sink_params_.Reset(kOutputFormat, kOutputChannelLayout, kOutputChannels,
230 kOutputSampleRate, kOutputBitsPerSample,
231 kOutputFramesPerBuffer);
232
233 source_data_.reset(new int16[kSourceDataLength]);
234
235 // Prepare the track and audio source.
236 blink::WebMediaStreamTrack blink_track;
237 PrepareTrackWithMediaStreamType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track);
238
239 // Get the native track from the blink track and initialize.
240 native_track_ =
241 static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData());
242 native_track_->OnSetFormat(source_params_);
243
244 // Create and initialize the consumer.
245 recognizer_ = new FakeSpeechRecognizer();
246 recognizer_->Initialize(
247 blink_track, sink_params_,
248 base::Bind(&SpeechRecognitionAudioSourceProviderTest::StoppedCallback,
249 base::Unretained(this)));
250
251 // Init the producer.
252 audio_source_provider_.reset(recognizer_->SourceProvider());
253 }
254
255 // Mock callback for when the track is stopped.
256 MOCK_METHOD0(StoppedCallback, void());
257
258 protected:
259 static void PrepareTrackWithMediaStreamType(
260 const MediaStreamType device_type,
261 blink::WebMediaStreamTrack* blink_track) {
262 // Device info.
263 StreamDeviceInfo device_info(device_type, "Mock audio device",
264 "mock_audio_device_id");
265
266 // Constraints.
267 MockMediaConstraintFactory constraint_factory;
268 const blink::WebMediaConstraints constraints =
269 constraint_factory.CreateWebMediaConstraints();
270
271 // Capturer.
272 scoped_refptr<WebRtcAudioCapturer> capturer(
273 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL,
274 NULL));
275
276 // Adapter.
277 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
278 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
279
280 // Native track.
281 scoped_ptr<WebRtcLocalAudioTrack> native_track(
282 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
283
284 // Blink audio source.
285 blink::WebMediaStreamSource blink_audio_source;
286 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
287 blink::WebMediaStreamSource::TypeAudio,
288 base::UTF8ToUTF16("dummy_source_name"));
289 MediaStreamSource::SourceStoppedCallback cb;
290 blink_audio_source.setExtraData(
291 new MediaStreamAudioSource(-1, device_info, cb, NULL));
292
293 // Blink track.
294 blink_track->initialize(blink::WebString::fromUTF8("audio_track"),
295 blink_audio_source);
296 blink_track->setExtraData(native_track.release());
297 }
298
299 // Emulates an audio capture device capturing data from the source.
300 inline void CaptureAudio(const size_t buffers) {
301 for (size_t i = 0; i < buffers; ++i)
302 native_track_->Capture(source_data_.get(),
303 base::TimeDelta::FromMilliseconds(0), 1, false,
304 false);
305 }
306
307 // Helper method to verify captured audio data has been consumed.
308 inline void AssertConsumedBuffers(const size_t buffer_index) {
309 ASSERT_EQ(buffer_index, recognizer_->buffer_index());
310 }
311
312 // Helper method to push audio data to producer and verify it was consumed.
313 inline void CaptureAudioAndAssertConsumedBuffers(const size_t buffers,
314 const size_t buffer_index) {
315 CaptureAudio(buffers);
316 AssertConsumedBuffers(buffer_index);
317 }
318
319 // Producer.
320 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
321 // Consumer.
322 FakeSpeechRecognizer* recognizer_;
323 // Audio related members.
324 scoped_ptr<int16[]> source_data_;
325 media::AudioParameters source_params_;
326 media::AudioParameters sink_params_;
327 WebRtcLocalAudioTrack* native_track_;
328 };
329
330 ////////////////////////////////////////////////////////////////////////////////
331
332 TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckIsSupportedTrackType) {
333 typedef std::map<MediaStreamType, bool> SupportedTrackPolicy;
334 // This test must be aligned with the policy of supported tracks.
335 SupportedTrackPolicy p;
336 p[MEDIA_NO_SERVICE] = false;
337 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // Only one supported for now.
338 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
339 p[MEDIA_TAB_AUDIO_CAPTURE] = false;
340 p[MEDIA_TAB_VIDEO_CAPTURE] = false;
341 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
342 p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;
343 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
344 // Ensure this test gets updated along with |content::MediaStreamType| enum.
345 EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
346 // Check the the entire policy.
347 for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) {
348 blink::WebMediaStreamTrack blink_track;
349 PrepareTrackWithMediaStreamType(it->first, &blink_track);
350 ASSERT_EQ(
351 it->second,
352 SpeechRecognitionAudioSourceProvider::IsSupportedTrack(blink_track));
353 }
354 }
355
356 TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) {
357 AssertConsumedBuffers(0U);
358 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
359 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U);
360 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 3U);
361 }
362
363 TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) {
364 // Fill audio input frames with 0, 1, 2, 3, ..., 440.
365 for (size_t i = 0; i < kSourceDataLength; ++i)
366 source_data_[i] = i;
367
368 const size_t num_frames_to_test = 12;
369 const size_t sink_data_length = kOutputFramesPerBuffer * kOutputChannels;
370 int16 sink_data[sink_data_length];
371 media::AudioBus* sink_bus = recognizer_->audio_bus();
372
373 // Render the audio data from the recognizer.
374 sink_bus->ToInterleaved(sink_bus->frames(),
375 sink_params_.bits_per_sample() / 8, sink_data);
376
377 // Test both channels are zeroed out before we trigger resampling.
378 for (size_t i = 0; i < num_frames_to_test; ++i) {
379 ASSERT_EQ(0, sink_data[i * 2]);
380 ASSERT_EQ(0, sink_data[i * 2 + 1]);
381 }
382
383 // Trigger the source provider to resample the input data.
384 AssertConsumedBuffers(0U);
385 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
386
387 // Render the audio data from the recognizer.
388 sink_bus->ToInterleaved(sink_bus->frames(),
389 sink_params_.bits_per_sample() / 8, sink_data);
390
391 // Resampled data expected frames - based on |source_data_|.
392 // Note: these values also depend on input/output audio params.
393 const int16 expected_data[num_frames_to_test] = {0, 2, 5, 8, 11, 13,
394 16, 19, 22, 24, 27, 30};
395
396 // Test both channels have same resampled data.
397 for (size_t i = 0; i < num_frames_to_test; ++i) {
398 ASSERT_EQ(expected_data[i], sink_data[i * 2]);
399 ASSERT_EQ(expected_data[i], sink_data[i * 2 + 1]);
400 }
401 }
402
403 TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) {
404 // (2) Start out with no problems.
405 AssertConsumedBuffers(0U);
406 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
407
408 // (2) A failure occurs (socket cannot to send).
409 recognizer_->SetFailureModeOnForeignSocket(true);
410 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
411 }
412
413 TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) {
414 AssertConsumedBuffers(0U);
415 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
416 EXPECT_CALL(*this, StoppedCallback()).Times(1);
417
418 native_track_->Stop();
419 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
420 }
421
422 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698