Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(206)

Side by Side Diff: content/renderer/media/speech_recognition_audio_source_provider_unittest.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: SyncSocket leak and FIFO fixes. Test 8-192KHz for input. Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/media/speech_recognition_audio_source_provider.h"
6
7 #include "base/strings/utf_string_conversions.h"
8 #include "content/renderer/media/mock_media_constraint_factory.h"
9 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
10 #include "content/renderer/media/webrtc_local_audio_track.h"
11 #include "media/audio/audio_parameters.h"
12 #include "media/base/audio_bus.h"
13 #include "testing/gmock/include/gmock/gmock.h"
14 #include "testing/gtest/include/gtest/gtest.h"
15 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
16
17 namespace content {
18
19 // Mocked out sockets used for Send/Receive.
20 // Data is written and read from a shared buffer used as a FIFO and there is
21 // no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
22 class MockSyncSocket : public base::SyncSocket {
23 public:
24 // This allows for 2 requests in queue between the |MockSyncSocket|s.
25 static const int kSharedBufferSize = 8;
26
27 // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap.
28 struct SharedBuffer {
29 SharedBuffer() : start(0), length(0) {}
30
31 uint8 data[kSharedBufferSize];
32 size_t start;
33 size_t length;
34 };
35
36 // Callback used for pairing an A.Send() with B.Receieve() without blocking.
37 typedef base::Callback<void()> OnSendCB;
38
39 explicit MockSyncSocket(SharedBuffer* shared_buffer)
40 : buffer_(shared_buffer),
41 in_failure_mode_(false) { }
42
43 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb)
44 : buffer_(shared_buffer),
45 on_send_cb_(on_send_cb),
46 in_failure_mode_(false) { }
47
48 virtual size_t Send(const void* buffer, size_t length) OVERRIDE;
49 virtual size_t Receive(void* buffer, size_t length) OVERRIDE;
50
51 // When |in_failure_mode_| == true, the socket fails to send.
52 void SetFailureMode(bool in_failure_mode) {
53 in_failure_mode_ = in_failure_mode;
54 }
55
56 private:
57 SharedBuffer* buffer_;
58 const OnSendCB on_send_cb_;
59 bool in_failure_mode_;
60 };
61
62 size_t MockSyncSocket::Send(const void* buffer, size_t length) {
63 if (in_failure_mode_)
64 return 0;
65
66 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
67 for (size_t i = 0; i < length; i++, buffer_->length++)
burnik 2014/09/23 12:39:21 Changed to prefixed increment.
68 buffer_->data[buffer_->start + buffer_->length] = b[i];
69
70 on_send_cb_.Run();
71 return length;
72 }
73
74 size_t MockSyncSocket::Receive(void* buffer, size_t length) {
75 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
76 for (size_t i = buffer_->start; i < buffer_->length; i++, buffer_->start++)
burnik 2014/09/23 12:39:20 Changed to prefixed increment.
77 b[i] = buffer_->data[buffer_->start];
78
79 // Since buffer is used sequentially, we can reset the buffer indices here.
80 buffer_->start = buffer_->length = 0;
81 return length;
82 }
83
84 ////////////////////////////////////////////////////////////////////////////////
henrika (OOO until Aug 14) 2014/09/22 08:02:19 Please remove these non-standard separators.
burnik 2014/09/22 09:17:36 Done.
no longer working on chromium 2014/09/23 10:09:13 Not done yet.
burnik 2014/09/23 12:39:21 Yes, done for next patchset as advertised.
85
86 class FakeSpeechRecognizer {
henrika (OOO until Aug 14) 2014/09/22 08:02:19 This looks like a very complex helper which is now
burnik 2014/09/22 09:17:36 This is the mock consumer. Unit tests focus on the
henrika (OOO until Aug 14) 2014/09/23 10:45:33 I am not saying it will fail but that is a large h
burnik 2014/09/23 12:39:21 Ok. I'll revisit existing unit tests to see if any
87 public:
88 FakeSpeechRecognizer() : is_responsive_(true) { }
89
90 void Initialize(
91 const blink::WebMediaStreamTrack& track,
92 const media::AudioParameters& sink_params,
93 base::SharedMemoryHandle* foreign_memory_handle) {
94 // Shared memory is allocated, mapped and shared.
95 uint32 shared_memory_size =
96 sizeof(media::AudioInputBufferParameters) +
97 media::AudioBus::CalculateMemorySize(sink_params);
98 shared_memory_.reset(new base::SharedMemory());
99 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size));
100 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
101 foreign_memory_handle));
102
103 // Wrap the shared memory for the audio bus.
104 media::AudioInputBuffer* buffer =
105 static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
106 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
107
108 // Reference to the counter used to synchronize.
109 buffer_index_ = &(buffer->params.size);
110 *buffer_index_ = 0U;
111
112 // Create a shared buffer for the |MockSyncSocket|s.
113 shared_buffer_.reset(new MockSyncSocket::SharedBuffer());
114
115 // Local socket will receive signals from the producer.
116 local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
117
118 // We automatically trigger a Receive when data is sent over the socket.
119 foreign_socket_ = new MockSyncSocket(
120 shared_buffer_.get(),
121 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
122 base::Unretained(this)));
123
124 // This is usually done to pair the sockets. Here it's not effective.
125 base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_);
126 }
127
128 // Emulates a single iteraton of a thread receiving on the socket.
129 // This would normally be done on a receiving thread's task on the browser.
130 void EmulateReceiveThreadLoopIteration() {
131 // When not responsive do nothing as if the process is busy.
132 if (!is_responsive_)
133 return;
134
135 local_socket_->Receive(buffer_index_, sizeof(*buffer_index_));
136 // Notify the producer that the audio buffer has been consumed.
137 (*buffer_index_)++;
no longer working on chromium 2014/09/23 10:09:13 ++(*buffer_index_)
burnik 2014/09/23 12:39:21 Done.
138 }
139
140 // Used to simulate an unresponsive behaviour of the consumer.
141 void SimulateResponsiveness(bool is_responsive) {
142 is_responsive_ = is_responsive;
143 }
144
145 MockSyncSocket* foreign_socket() { return foreign_socket_; }
146 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
147 uint32 buffer_index() { return *buffer_index_; }
148
149 private:
150 bool is_responsive_;
151
152 // Shared memory for the audio and synchronization.
153 scoped_ptr<base::SharedMemory> shared_memory_;
154
155 // Fake sockets shared buffer.
156 scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_;
157 scoped_ptr<MockSyncSocket> local_socket_;
158 MockSyncSocket* foreign_socket_;
no longer working on chromium 2014/09/23 10:09:13 why this is a raw pointer?
burnik 2014/09/23 12:39:21 It is owned by the recognizer and destroyed there.
159
160 // Audio bus wrapping the shared memory from the renderer.
161 scoped_ptr<media::AudioBus> audio_track_bus_;
162
163 // Used for synchronization of sent/received buffers.
164 uint32* buffer_index_;
165 };
166
167 ////////////////////////////////////////////////////////////////////////////////
henrika (OOO until Aug 14) 2014/09/22 08:02:19 remove
burnik 2014/09/22 09:17:36 Done.
168
169 namespace {
170
171 // Supported speech recognition audio parameters.
172 const int kSpeechRecognitionSampleRate = 16000;
173 const int kSpeechRecognitionFramesPerBuffer = 1600;
174
175 // Input audio format.
176 const media::AudioParameters::Format kInputFormat =
177 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
178 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
179 const int kInputChannels = 1;
180 const int kInputBitsPerSample = 16;
181
182 // Output audio format.
183 const media::AudioParameters::Format kOutputFormat =
184 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
185 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
186 const int kOutputChannels = 2;
187 const int kOutputBitsPerSample = 16;
188
189 } // namespace
190
191 ////////////////////////////////////////////////////////////////////////////////
192
193 class SpeechRecognitionAudioSourceProviderTest : public testing::Test {
194 public:
195 SpeechRecognitionAudioSourceProviderTest() { }
196
197 // Initializes the producer and consumer with specified audio parameters.
henrika (OOO until Aug 14) 2014/09/22 08:02:18 Can you elaborate on what a producer and consumer
burnik 2014/09/22 09:17:36 Yes. It's explained on lines 228 - 238.
198 // Returns the minimal number of input audio buffers which need to be captured
199 // before they get sent to the consumer.
200 uint32 Initialize(int input_sample_rate,
201 int input_frames_per_buffer,
202 int output_sample_rate,
203 int output_frames_per_buffer) {
204 // Audio Environment setup.
205 source_params_.Reset(kInputFormat,
206 kInputChannelLayout,
207 kInputChannels,
208 input_sample_rate,
209 kInputBitsPerSample,
210 input_frames_per_buffer);
211 sink_params_.Reset(kOutputFormat,
212 kOutputChannelLayout,
213 kOutputChannels,
214 output_sample_rate,
215 kOutputBitsPerSample,
216 output_frames_per_buffer);
217 source_data_.reset(new int16[input_frames_per_buffer * kInputChannels]);
218
219 // Prepare the track and audio source.
220 blink::WebMediaStreamTrack blink_track;
221 PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track);
222
223 // Get the native track from the blink track and initialize.
224 native_track_ =
225 static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData());
226 native_track_->OnSetFormat(source_params_);
227
228 // Create and initialize the consumer.
229 recognizer_.reset(new FakeSpeechRecognizer());
230 base::SharedMemoryHandle foreign_memory_handle;
231 recognizer_->Initialize(blink_track, sink_params_, &foreign_memory_handle);
232
233 // Create the producer.
234 audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(
235 blink_track, sink_params_, foreign_memory_handle,
236 recognizer_->foreign_socket(),
237 base::Bind(&SpeechRecognitionAudioSourceProviderTest::StoppedCallback,
238 base::Unretained(this))));
239
240 // Return number of buffers needed to trigger resampling and consumption.
241 return static_cast<uint32>(std::ceil(
242 static_cast<double>(output_frames_per_buffer * input_sample_rate) /
243 (input_frames_per_buffer * output_sample_rate)));
244 }
245
246 // Mock callback expected to be called when the track is stopped.
247 MOCK_METHOD0(StoppedCallback, void());
248
249 protected:
250 static void PrepareBlinkTrackOfType(
251 const MediaStreamType device_type,
252 blink::WebMediaStreamTrack* blink_track) {
253 // Device info.
254 StreamDeviceInfo device_info(device_type, "Mock audio device",
255 "mock_audio_device_id");
256
257 // Constraints.
258 MockMediaConstraintFactory constraint_factory;
259 const blink::WebMediaConstraints constraints =
260 constraint_factory.CreateWebMediaConstraints();
261
262 // Capturer.
henrika (OOO until Aug 14) 2014/09/22 08:02:18 These comments does not add much. Please explain w
burnik 2014/09/22 09:17:36 All these comments are now removed.
263 scoped_refptr<WebRtcAudioCapturer> capturer(
264 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL,
265 NULL));
266
267 // Adapter.
268 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
269 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
270
271 // Native track.
272 scoped_ptr<WebRtcLocalAudioTrack> native_track(
273 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
274
275 // Blink audio source.
276 blink::WebMediaStreamSource blink_audio_source;
277 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
278 blink::WebMediaStreamSource::TypeAudio,
279 base::UTF8ToUTF16("dummy_source_name"));
280 MediaStreamSource::SourceStoppedCallback cb;
281 blink_audio_source.setExtraData(
282 new MediaStreamAudioSource(-1, device_info, cb, NULL));
283
284 // Blink track.
285 blink_track->initialize(blink::WebString::fromUTF8("dummy_audio_track"),
286 blink_audio_source);
287 blink_track->setExtraData(native_track.release());
288 }
289
290 // Emulates an audio capture device capturing data from the source.
291 inline void CaptureAudio(const uint32 buffers) {
292 for (uint32 i = 0; i < buffers; ++i)
293 native_track_->Capture(source_data_.get(),
294 base::TimeDelta::FromMilliseconds(0), 1, false,
henrika (OOO until Aug 14) 2014/09/22 08:02:18 FromMilliseconds(0)?
burnik 2014/09/22 09:17:36 Yes, no delay is required in the unit test.
295 false);
296 }
297
298 // Used to simulate a problem with sockets.
299 void SetFailureModeOnForeignSocket(bool in_failure_mode) {
300 recognizer_->foreign_socket()->SetFailureMode(in_failure_mode);
301 }
302
303 // Helper method for verifying captured audio data has been consumed.
304 inline void AssertConsumedBuffers(const uint32 buffer_index) {
305 ASSERT_EQ(buffer_index, recognizer_->buffer_index());
306 }
307
308 // Helper method for providing audio data to producer and verifying it was
309 // consumed on the recognizer.
310 inline void CaptureAudioAndAssertConsumedBuffers(const uint32 buffers,
311 const uint32 buffer_index) {
312 CaptureAudio(buffers);
313 AssertConsumedBuffers(buffer_index);
314 }
315
316 // Helper method to capture and assert consumption at different sample rates
317 // and audio buffer sizes.
318 inline void AssertConsumptionForAudioParameters(
319 const int input_sample_rate,
320 const int input_frames_per_buffer,
321 const int output_sample_rate,
322 const int output_frames_per_buffer,
323 const uint32 consumptions) {
324 const uint32 kBuffersPerNotification =
325 Initialize(input_sample_rate, input_frames_per_buffer,
326 output_sample_rate, output_frames_per_buffer);
327 AssertConsumedBuffers(0U);
328
329 for (uint32 i = 1U; i <= consumptions; ++i) {
330 CaptureAudio(kBuffersPerNotification);
331 ASSERT_EQ(i, recognizer_->buffer_index())
332 << "Tested at rates: "
333 << "In(" << input_sample_rate << ", " << input_frames_per_buffer
334 << ") "
335 << "Out(" << output_sample_rate << ", " << output_frames_per_buffer
336 << ")";
337 }
338 }
339
340 // Producer.
341 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
342
343 // Consumer.
344 scoped_ptr<FakeSpeechRecognizer> recognizer_;
345
346 // Audio related members.
347 scoped_ptr<int16[]> source_data_;
348 media::AudioParameters source_params_;
349 media::AudioParameters sink_params_;
350 WebRtcLocalAudioTrack* native_track_;
351 };
352
353 ////////////////////////////////////////////////////////////////////////////////
354
355 TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckIsSupportedAudioTrack) {
henrika (OOO until Aug 14) 2014/09/22 08:02:18 Could you make the name more clear? CheckIsSupport
burnik 2014/09/22 09:17:37 Added comment above test.
356 typedef std::map<MediaStreamType, bool> SupportedTrackPolicy;
357
358 // This test must be aligned with the policy of supported tracks.
359 SupportedTrackPolicy p;
360 p[MEDIA_NO_SERVICE] = false;
361 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // The only one supported for now.
362 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
363 p[MEDIA_TAB_AUDIO_CAPTURE] = false;
364 p[MEDIA_TAB_VIDEO_CAPTURE] = false;
365 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
366 p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;
367 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
368
369 // Ensure this test gets updated along with |content::MediaStreamType| enum.
370 EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
371
372 // Check the the entire policy.
373 for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) {
374 blink::WebMediaStreamTrack blink_track;
375 PrepareBlinkTrackOfType(it->first, &blink_track);
376 ASSERT_EQ(
377 it->second,
378 SpeechRecognitionAudioSourceProvider::IsSupportedTrack(blink_track));
379 }
380 }
381
382 TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) {
henrika (OOO until Aug 14) 2014/09/22 08:02:18 Please add some lines of comments above each test
burnik 2014/09/22 09:17:37 Done.
383 const size_t kNumAudioParamTuples = 22;
384 const int kAudioParams[kNumAudioParamTuples][2] = {
385 {8000, 80}, {8000, 800}, {16000, 160}, {16000, 1600},
386 {32000, 320}, {32000, 3200}, {44100, 441}, {44100, 4410},
387 {48000, 480}, {48000, 4800}, {96000, 960}, {96000, 9600},
388 {11025, 111}, {11025, 1103}, {22050, 221}, {22050, 2205},
389 {88200, 882}, {88200, 8820}, {176400, 1764}, {176400, 17640},
390 {192000, 1920}, {192000, 19200}};
391
392 // Check all listed tuples of input sample rates and buffers sizes.
393 for (size_t i = 0; i < kNumAudioParamTuples; ++i) {
394 AssertConsumptionForAudioParameters(
395 kAudioParams[i][0], kAudioParams[i][1],
396 kSpeechRecognitionSampleRate, kSpeechRecognitionFramesPerBuffer, 3U);
397 }
398 }
399
400 TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) {
henrika (OOO until Aug 14) 2014/09/22 08:02:19 Lots of hardcoded values in this test. Makes it di
burnik 2014/09/22 09:17:37 Added more comments. I don't test that the resampl
401 const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
402
403 // Fill audio input frames with 0, 1, 2, 3, ..., 440.
404 const uint32 source_data_length = 441 * kInputChannels;
405 for (uint32 i = 0; i < source_data_length; ++i)
406 source_data_[i] = i;
407
408 const uint32 num_frames_to_test = 12;
409 const uint32 sink_data_length = 1600 * kOutputChannels;
410 int16 sink_data[sink_data_length];
411 media::AudioBus* sink_bus = recognizer_->audio_bus();
412
413 // Render the audio data from the recognizer.
414 sink_bus->ToInterleaved(sink_bus->frames(),
415 sink_params_.bits_per_sample() / 8, sink_data);
416
417 // Test both channels are zeroed out before we trigger resampling.
418 for (uint32 i = 0; i < num_frames_to_test; ++i) {
419 ASSERT_EQ(0, sink_data[i * 2]);
420 ASSERT_EQ(0, sink_data[i * 2 + 1]);
421 }
422
423 // Trigger the source provider to resample the input data.
424 AssertConsumedBuffers(0U);
425 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
426
427 // Render the audio data from the recognizer.
428 sink_bus->ToInterleaved(sink_bus->frames(),
429 sink_params_.bits_per_sample() / 8, sink_data);
430
431 // Resampled data expected frames - based on |source_data_|.
432 const int16 expected_data[num_frames_to_test] = {0, 2, 5, 8, 11, 13,
433 16, 19, 22, 24, 27, 30};
434
435 // Test both channels have same resampled data.
436 for (uint32 i = 0; i < num_frames_to_test; ++i) {
437 ASSERT_EQ(expected_data[i], sink_data[i * 2]);
438 ASSERT_EQ(expected_data[i], sink_data[i * 2 + 1]);
439 }
440 }
441
442 TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) {
443 const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
444 // (1) Start with no problems on the socket.
henrika (OOO until Aug 14) 2014/09/22 08:02:18 Remove (1) and (2)
burnik 2014/09/22 09:17:37 Done.
445 AssertConsumedBuffers(0U);
446 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
447
448 // (2) A failure occurs (socket cannot to send).
449 SetFailureModeOnForeignSocket(true);
450 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
451 }
452
453 TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) {
454 const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
455 AssertConsumedBuffers(0U);
456 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
457 EXPECT_CALL(*this, StoppedCallback()).Times(1);
458
459 native_track_->Stop();
460 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
461 }
462
463 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698