OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/renderer/media/speech_recognition_audio_sink.h" | |
6 | |
7 #include "base/strings/utf_string_conversions.h" | |
8 #include "content/renderer/media/mock_media_constraint_factory.h" | |
9 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h" | |
10 #include "content/renderer/media/webrtc_local_audio_track.h" | |
11 #include "media/audio/audio_parameters.h" | |
12 #include "media/base/audio_bus.h" | |
13 #include "testing/gmock/include/gmock/gmock.h" | |
14 #include "testing/gtest/include/gtest/gtest.h" | |
15 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h" | |
16 | |
17 namespace content { | |
18 | |
19 // Mocked out sockets used for Send/Receive. | |
no longer working on chromium
2014/09/29 09:28:57
nit, put all these helper class into anonymous nam
burnik
2014/09/29 10:24:32
Done.
| |
20 // Data is written and read from a shared buffer used as a FIFO and there is | |
21 // no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket. | |
22 class MockSyncSocket : public base::SyncSocket { | |
23 public: | |
24 // This allows for 2 requests in queue between the |MockSyncSocket|s. | |
25 static const int kSharedBufferSize = 8; | |
26 | |
27 // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap. | |
28 struct SharedBuffer { | |
29 SharedBuffer() : data(), start(0), length(0) {} | |
30 | |
31 uint8 data[kSharedBufferSize]; | |
32 size_t start; | |
33 size_t length; | |
34 }; | |
35 | |
36 // Callback used for pairing an A.Send() with B.Receieve() without blocking. | |
37 typedef base::Callback<void()> OnSendCB; | |
38 | |
39 explicit MockSyncSocket(SharedBuffer* shared_buffer) | |
40 : buffer_(shared_buffer), | |
41 in_failure_mode_(false) {} | |
42 | |
43 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb) | |
44 : buffer_(shared_buffer), | |
45 on_send_cb_(on_send_cb), | |
46 in_failure_mode_(false) {} | |
47 | |
48 virtual size_t Send(const void* buffer, size_t length) OVERRIDE; | |
49 virtual size_t Receive(void* buffer, size_t length) OVERRIDE; | |
50 | |
51 // When |in_failure_mode_| == true, the socket fails to send. | |
52 void SetFailureMode(bool in_failure_mode) { | |
53 in_failure_mode_ = in_failure_mode; | |
54 } | |
55 | |
56 private: | |
57 SharedBuffer* buffer_; | |
58 const OnSendCB on_send_cb_; | |
59 bool in_failure_mode_; | |
60 }; | |
61 | |
62 size_t MockSyncSocket::Send(const void* buffer, size_t length) { | |
63 if (in_failure_mode_) | |
64 return 0; | |
65 | |
66 const uint8* b = static_cast<const uint8*>(buffer); | |
67 for (size_t i = 0; i < length; ++i, ++buffer_->length) | |
68 buffer_->data[buffer_->start + buffer_->length] = b[i]; | |
69 | |
70 on_send_cb_.Run(); | |
71 return length; | |
72 } | |
73 | |
74 size_t MockSyncSocket::Receive(void* buffer, size_t length) { | |
75 uint8* b = static_cast<uint8*>(buffer); | |
76 for (size_t i = buffer_->start; i < buffer_->length; ++i, ++buffer_->start) | |
77 b[i] = buffer_->data[buffer_->start]; | |
78 | |
79 // Since buffer is used sequentially, we can reset the buffer indices here. | |
80 buffer_->start = buffer_->length = 0; | |
81 return length; | |
82 } | |
83 | |
84 // This fake class is the consumer used to verify behaviour of the producer. | |
85 // The |Initialize()| method shows what the consumer should be responsible for | |
86 // in the production code (minus the mocks). | |
87 class FakeSpeechRecognizer { | |
88 public: | |
89 FakeSpeechRecognizer() : is_responsive_(true) { } | |
90 | |
91 void Initialize( | |
92 const blink::WebMediaStreamTrack& track, | |
93 const media::AudioParameters& sink_params, | |
94 base::SharedMemoryHandle* foreign_memory_handle) { | |
95 // Shared memory is allocated, mapped and shared. | |
96 uint32 shared_memory_size = | |
97 sizeof(media::AudioInputBufferParameters) + | |
98 media::AudioBus::CalculateMemorySize(sink_params); | |
99 shared_memory_.reset(new base::SharedMemory()); | |
100 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size)); | |
101 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(), | |
102 foreign_memory_handle)); | |
103 | |
104 // Wrap the shared memory for the audio bus. | |
105 media::AudioInputBuffer* buffer = | |
106 static_cast<media::AudioInputBuffer*>(shared_memory_->memory()); | |
107 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio); | |
108 | |
109 // Reference to the counter used to synchronize. | |
110 buffer_index_ = &(buffer->params.size); | |
111 *buffer_index_ = 0U; | |
112 | |
113 // Create a shared buffer for the |MockSyncSocket|s. | |
114 shared_buffer_.reset(new MockSyncSocket::SharedBuffer()); | |
115 | |
116 // Local socket will receive signals from the producer. | |
117 local_socket_.reset(new MockSyncSocket(shared_buffer_.get())); | |
118 | |
119 // We automatically trigger a Receive when data is sent over the socket. | |
120 foreign_socket_ = new MockSyncSocket( | |
121 shared_buffer_.get(), | |
122 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration, | |
123 base::Unretained(this))); | |
124 | |
125 // This is usually done to pair the sockets. Here it's not effective. | |
126 base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_); | |
127 } | |
128 | |
129 // Emulates a single iteraton of a thread receiving on the socket. | |
130 // This would normally be done on a receiving thread's task on the browser. | |
131 void EmulateReceiveThreadLoopIteration() { | |
132 // When not responsive do nothing as if the process is busy. | |
133 if (!is_responsive_) | |
134 return; | |
135 | |
136 local_socket_->Receive(buffer_index_, sizeof(*buffer_index_)); | |
137 // Notify the producer that the audio buffer has been consumed. | |
138 ++(*buffer_index_); | |
139 } | |
140 | |
141 // Used to simulate an unresponsive behaviour of the consumer. | |
142 void SimulateResponsiveness(bool is_responsive) { | |
143 is_responsive_ = is_responsive; | |
144 } | |
145 | |
146 MockSyncSocket* foreign_socket() { return foreign_socket_; } | |
147 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); } | |
148 uint32 buffer_index() { return *buffer_index_; } | |
149 | |
150 private: | |
151 bool is_responsive_; | |
152 | |
153 // Shared memory for the audio and synchronization. | |
154 scoped_ptr<base::SharedMemory> shared_memory_; | |
155 | |
156 // Fake sockets and their shared buffer. | |
157 scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_; | |
158 scoped_ptr<MockSyncSocket> local_socket_; | |
159 MockSyncSocket* foreign_socket_; | |
160 | |
161 // Audio bus wrapping the shared memory from the renderer. | |
162 scoped_ptr<media::AudioBus> audio_track_bus_; | |
163 | |
164 // Used for synchronization of sent/received buffers. | |
165 uint32* buffer_index_; | |
166 }; | |
167 | |
168 namespace { | |
169 | |
170 // Supported speech recognition audio parameters. | |
171 const int kSpeechRecognitionSampleRate = 16000; | |
172 const int kSpeechRecognitionFramesPerBuffer = 1600; | |
173 | |
174 // Input audio format. | |
175 const media::AudioParameters::Format kInputFormat = | |
176 media::AudioParameters::AUDIO_PCM_LOW_LATENCY; | |
177 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO; | |
178 const int kInputChannels = 1; | |
179 const int kInputBitsPerSample = 16; | |
180 | |
181 // Output audio format. | |
182 const media::AudioParameters::Format kOutputFormat = | |
183 media::AudioParameters::AUDIO_PCM_LOW_LATENCY; | |
184 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO; | |
185 const int kOutputChannels = 2; | |
186 const int kOutputBitsPerSample = 16; | |
no longer working on chromium
2014/09/29 09:28:57
move all these variable on top of MockSyncSocket,
burnik
2014/09/29 10:24:31
Done.
| |
187 | |
188 } // namespace | |
189 | |
190 class SpeechRecognitionAudioSinkTest : public testing::Test { | |
191 public: | |
192 SpeechRecognitionAudioSinkTest() { } | |
193 | |
194 // Initializes the producer and consumer with specified audio parameters. | |
195 // Returns the minimal number of input audio buffers which need to be captured | |
196 // before they get sent to the consumer. | |
197 uint32 Initialize(int input_sample_rate, | |
198 int input_frames_per_buffer, | |
199 int output_sample_rate, | |
200 int output_frames_per_buffer) { | |
201 // Audio Environment setup. | |
202 source_params_.Reset(kInputFormat, | |
203 kInputChannelLayout, | |
204 kInputChannels, | |
205 input_sample_rate, | |
206 kInputBitsPerSample, | |
207 input_frames_per_buffer); | |
208 sink_params_.Reset(kOutputFormat, | |
209 kOutputChannelLayout, | |
210 kOutputChannels, | |
211 output_sample_rate, | |
212 kOutputBitsPerSample, | |
213 output_frames_per_buffer); | |
214 source_data_.reset(new int16[input_frames_per_buffer * kInputChannels]); | |
215 | |
216 // Prepare the track and audio source. | |
217 blink::WebMediaStreamTrack blink_track; | |
218 PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track); | |
219 | |
220 // Get the native track from the blink track and initialize. | |
221 native_track_ = | |
222 static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData()); | |
223 native_track_->OnSetFormat(source_params_); | |
224 | |
225 // Create and initialize the consumer. | |
226 recognizer_.reset(new FakeSpeechRecognizer()); | |
227 base::SharedMemoryHandle foreign_memory_handle; | |
228 recognizer_->Initialize(blink_track, sink_params_, &foreign_memory_handle); | |
229 | |
230 // Create the producer. | |
231 scoped_ptr<base::SyncSocket> foreign_socket(recognizer_->foreign_socket()); | |
232 speech_audio_sink_.reset(new SpeechRecognitionAudioSink( | |
233 blink_track, sink_params_, foreign_memory_handle, | |
234 foreign_socket.Pass(), | |
235 base::Bind(&SpeechRecognitionAudioSinkTest::StoppedCallback, | |
236 base::Unretained(this)))); | |
237 | |
238 // Return number of buffers needed to trigger resampling and consumption. | |
239 return static_cast<uint32>(std::ceil( | |
240 static_cast<double>(output_frames_per_buffer * input_sample_rate) / | |
241 (input_frames_per_buffer * output_sample_rate))); | |
242 } | |
243 | |
244 // Mock callback expected to be called when the track is stopped. | |
245 MOCK_METHOD0(StoppedCallback, void()); | |
246 | |
247 protected: | |
248 // Prepares a blink track of a given MediaStreamType and attaches the native | |
249 // track which can be used to capture audio data and pass it to the producer. | |
250 static void PrepareBlinkTrackOfType( | |
251 const MediaStreamType device_type, | |
252 blink::WebMediaStreamTrack* blink_track) { | |
253 StreamDeviceInfo device_info(device_type, "Mock device", | |
254 "mock_device_id"); | |
255 MockMediaConstraintFactory constraint_factory; | |
256 const blink::WebMediaConstraints constraints = | |
257 constraint_factory.CreateWebMediaConstraints(); | |
258 scoped_refptr<WebRtcAudioCapturer> capturer( | |
259 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL, | |
260 NULL)); | |
261 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter( | |
262 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL)); | |
263 scoped_ptr<WebRtcLocalAudioTrack> native_track( | |
264 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL)); | |
265 blink::WebMediaStreamSource blink_audio_source; | |
266 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"), | |
267 blink::WebMediaStreamSource::TypeAudio, | |
268 base::UTF8ToUTF16("dummy_source_name")); | |
269 MediaStreamSource::SourceStoppedCallback cb; | |
270 blink_audio_source.setExtraData( | |
271 new MediaStreamAudioSource(-1, device_info, cb, NULL)); | |
272 blink_track->initialize(blink::WebString::fromUTF8("dummy_track"), | |
273 blink_audio_source); | |
274 blink_track->setExtraData(native_track.release()); | |
275 } | |
276 | |
277 // Emulates an audio capture device capturing data from the source. | |
278 inline void CaptureAudio(const uint32 buffers) { | |
279 for (uint32 i = 0; i < buffers; ++i) | |
280 native_track_->Capture(source_data_.get(), | |
281 base::TimeDelta::FromMilliseconds(0), 1, false, | |
282 false); | |
283 } | |
284 | |
285 // Used to simulate a problem with sockets. | |
286 void SetFailureModeOnForeignSocket(bool in_failure_mode) { | |
287 recognizer_->foreign_socket()->SetFailureMode(in_failure_mode); | |
288 } | |
289 | |
290 // Helper method for verifying captured audio data has been consumed. | |
291 inline void AssertConsumedBuffers(const uint32 buffer_index) { | |
292 ASSERT_EQ(buffer_index, recognizer_->buffer_index()); | |
293 } | |
294 | |
295 // Helper method for providing audio data to producer and verifying it was | |
296 // consumed on the recognizer. | |
297 inline void CaptureAudioAndAssertConsumedBuffers(const uint32 buffers, | |
298 const uint32 buffer_index) { | |
299 CaptureAudio(buffers); | |
300 AssertConsumedBuffers(buffer_index); | |
301 } | |
302 | |
303 // Helper method to capture and assert consumption at different sample rates | |
304 // and audio buffer sizes. | |
305 inline void AssertConsumptionForAudioParameters( | |
306 const int input_sample_rate, | |
307 const int input_frames_per_buffer, | |
308 const int output_sample_rate, | |
309 const int output_frames_per_buffer, | |
310 const uint32 consumptions) { | |
311 const uint32 kBuffersPerNotification = | |
312 Initialize(input_sample_rate, input_frames_per_buffer, | |
313 output_sample_rate, output_frames_per_buffer); | |
314 AssertConsumedBuffers(0U); | |
315 | |
316 for (uint32 i = 1U; i <= consumptions; ++i) { | |
317 CaptureAudio(kBuffersPerNotification); | |
318 ASSERT_EQ(i, recognizer_->buffer_index()) | |
319 << "Tested at rates: " | |
320 << "In(" << input_sample_rate << ", " << input_frames_per_buffer | |
321 << ") " | |
322 << "Out(" << output_sample_rate << ", " << output_frames_per_buffer | |
323 << ")"; | |
324 } | |
325 } | |
326 | |
327 // Producer. | |
328 scoped_ptr<SpeechRecognitionAudioSink> speech_audio_sink_; | |
329 | |
330 // Consumer. | |
331 scoped_ptr<FakeSpeechRecognizer> recognizer_; | |
332 | |
333 // Audio related members. | |
334 scoped_ptr<int16[]> source_data_; | |
335 media::AudioParameters source_params_; | |
336 media::AudioParameters sink_params_; | |
337 WebRtcLocalAudioTrack* native_track_; | |
338 }; | |
339 | |
340 // Not all types of tracks are supported. This test checks if that policy is | |
341 // implemented correctly. | |
342 TEST_F(SpeechRecognitionAudioSinkTest, CheckIsSupportedAudioTrack) { | |
343 typedef std::map<MediaStreamType, bool> SupportedTrackPolicy; | |
344 | |
345 // This test must be aligned with the policy of supported tracks. | |
346 SupportedTrackPolicy p; | |
347 p[MEDIA_NO_SERVICE] = false; | |
348 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // The only one supported for now. | |
349 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false; | |
350 p[MEDIA_TAB_AUDIO_CAPTURE] = false; | |
351 p[MEDIA_TAB_VIDEO_CAPTURE] = false; | |
352 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false; | |
353 p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false; | |
354 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false; | |
355 | |
356 // Ensure this test gets updated along with |content::MediaStreamType| enum. | |
357 EXPECT_EQ(NUM_MEDIA_TYPES, p.size()); | |
358 | |
359 // Check the the entire policy. | |
360 for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) { | |
361 blink::WebMediaStreamTrack blink_track; | |
362 PrepareBlinkTrackOfType(it->first, &blink_track); | |
363 ASSERT_EQ( | |
364 it->second, | |
365 SpeechRecognitionAudioSink::IsSupportedTrack(blink_track)); | |
366 } | |
367 } | |
368 | |
369 // Checks if the producer can support the listed range of input sample rates | |
370 // and associated buffer sizes. | |
371 TEST_F(SpeechRecognitionAudioSinkTest, RecognizerNotifiedOnSocket) { | |
372 const size_t kNumAudioParamTuples = 24; | |
373 const int kAudioParams[kNumAudioParamTuples][2] = { | |
374 {8000, 80}, {8000, 800}, {16000, 160}, {16000, 1600}, | |
375 {24000, 240}, {24000, 2400}, {32000, 320}, {32000, 3200}, | |
376 {44100, 441}, {44100, 4410}, {48000, 480}, {48000, 4800}, | |
377 {96000, 960}, {96000, 9600}, {11025, 111}, {11025, 1103}, | |
378 {22050, 221}, {22050, 2205}, {88200, 882}, {88200, 8820}, | |
379 {176400, 1764}, {176400, 17640}, {192000, 1920}, {192000, 19200}}; | |
380 | |
381 // Check all listed tuples of input sample rates and buffers sizes. | |
382 for (size_t i = 0; i < kNumAudioParamTuples; ++i) { | |
383 AssertConsumptionForAudioParameters( | |
384 kAudioParams[i][0], kAudioParams[i][1], | |
385 kSpeechRecognitionSampleRate, kSpeechRecognitionFramesPerBuffer, 3U); | |
386 } | |
387 } | |
388 | |
389 // Checks that the input data is getting resampled to the target sample rate. | |
390 TEST_F(SpeechRecognitionAudioSinkTest, AudioDataIsResampledOnSink) { | |
391 EXPECT_GE(kInputChannels, 1); | |
392 EXPECT_GE(kOutputChannels, 1); | |
393 | |
394 // Input audio is sampled at 44.1 KHz with data chunks of 10ms. Desired output | |
395 // is corresponding to the speech recognition engine requirements: 16 KHz with | |
396 // 100 ms chunks (1600 frames per buffer). | |
397 const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600); | |
398 | |
399 // Fill audio input frames with 0, 1, 2, 3, ..., 440. | |
400 const uint32 kSourceDataLength = 441 * kInputChannels; | |
401 for (uint32 i = 0; i < kSourceDataLength; ++i) { | |
402 for (int c = 0; c < kInputChannels; ++c) | |
403 source_data_[i * kInputChannels + c] = i; | |
404 } | |
405 | |
406 // Prepare sink audio bus and data for rendering. | |
407 media::AudioBus* sink_bus = recognizer_->audio_bus(); | |
408 const uint32 kSinkDataLength = 1600 * kOutputChannels; | |
409 int16 sink_data[kSinkDataLength] = {0}; | |
410 | |
411 // Render the audio data from the recognizer. | |
412 sink_bus->ToInterleaved(sink_bus->frames(), | |
413 sink_params_.bits_per_sample() / 8, sink_data); | |
414 | |
415 // Checking only a fraction of the sink frames. | |
416 const uint32 kNumFramesToTest = 12; | |
417 | |
418 // Check all channels are zeroed out before we trigger resampling. | |
419 for (uint32 i = 0; i < kNumFramesToTest; ++i) { | |
420 for (int c = 0; c < kOutputChannels; ++c) | |
421 EXPECT_EQ(0, sink_data[i * kOutputChannels + c]); | |
422 } | |
423 | |
424 // Trigger the speech sink to resample the input data. | |
425 AssertConsumedBuffers(0U); | |
426 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); | |
427 | |
428 // Render the audio data from the recognizer. | |
429 sink_bus->ToInterleaved(sink_bus->frames(), | |
430 sink_params_.bits_per_sample() / 8, sink_data); | |
431 | |
432 // Resampled data expected frames. Extracted based on |source_data_|. | |
433 const int16 kExpectedData[kNumFramesToTest] = {0, 2, 5, 8, 11, 13, | |
434 16, 19, 22, 24, 27, 30}; | |
435 | |
436 // Check all channels have the same resampled data. | |
437 for (uint32 i = 0; i < kNumFramesToTest; ++i) { | |
438 for (int c = 0; c < kOutputChannels; ++c) | |
439 EXPECT_EQ(kExpectedData[i], sink_data[i * kOutputChannels + c]); | |
440 } | |
441 } | |
442 | |
443 // Checks that the producer does not misbehave when a socket failure occurs. | |
444 TEST_F(SpeechRecognitionAudioSinkTest, SyncSocketFailsSendingData) { | |
445 const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600); | |
446 // Start with no problems on the socket. | |
447 AssertConsumedBuffers(0U); | |
448 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); | |
449 | |
450 // A failure occurs (socket cannot send). | |
451 SetFailureModeOnForeignSocket(true); | |
452 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); | |
453 } | |
454 | |
455 // Checks that an OnStoppedCallback is issued when the track is stopped. | |
456 TEST_F(SpeechRecognitionAudioSinkTest, OnReadyStateChangedOccured) { | |
457 const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600); | |
458 AssertConsumedBuffers(0U); | |
459 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); | |
460 EXPECT_CALL(*this, StoppedCallback()).Times(1); | |
461 | |
462 native_track_->Stop(); | |
463 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); | |
464 } | |
465 | |
466 } // namespace content | |
OLD | NEW |