OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "content/renderer/media/speech_recognition_audio_source_provider.h" |
| 6 |
| 7 #include "base/strings/utf_string_conversions.h" |
| 8 #include "content/renderer/media/mock_media_constraint_factory.h" |
| 9 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h" |
| 10 #include "content/renderer/media/webrtc_local_audio_track.h" |
| 11 #include "media/audio/audio_parameters.h" |
| 12 #include "media/base/audio_bus.h" |
| 13 #include "testing/gmock/include/gmock/gmock.h" |
| 14 #include "testing/gtest/include/gtest/gtest.h" |
| 15 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h" |
| 16 |
| 17 namespace { |
| 18 |
| 19 // Input audio format. |
| 20 static const media::AudioParameters::Format kInputFormat = |
| 21 media::AudioParameters::AUDIO_PCM_LOW_LATENCY; |
| 22 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO; |
| 23 const int kInputChannels = 1; |
| 24 const int kInputSampleRate = 44100; |
| 25 const int kInputBitsPerSample = 16; |
| 26 const int kInputFramesPerBuffer = 441; |
| 27 |
| 28 // Output audio format. |
| 29 const media::AudioParameters::Format kOutputFormat = |
| 30 media::AudioParameters::AUDIO_PCM_LOW_LATENCY; |
| 31 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO; |
| 32 const int kOutputChannels = 2; |
| 33 const int kOutputSampleRate = 16000; |
| 34 const int kOutputBitsPerSample = 16; |
| 35 const int kOutputFramesPerBuffer = 1600; |
| 36 |
| 37 // Minimal number of buffers which trigger a single SyncSocket transfer. |
| 38 const size_t kBuffersPerNotification = |
| 39 (kOutputFramesPerBuffer * kInputSampleRate) / |
| 40 (kInputFramesPerBuffer * kOutputSampleRate); |
| 41 |
| 42 // Number of frames of the source audio. |
| 43 const size_t kSourceDataLength = kInputFramesPerBuffer * kInputChannels; |
| 44 |
| 45 } // namespace |
| 46 |
| 47 //////////////////////////////////////////////////////////////////////////////// |
| 48 |
| 49 namespace content { |
| 50 |
| 51 // Mocked out sockets used for Send/Receive. |
| 52 // Data is written and read from a shared buffer used as a FIFO and there is |
| 53 // no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket. |
| 54 class MockSyncSocket : public base::SyncSocket { |
| 55 public: |
| 56 // This allows for 2 requests in queue between the |MockSyncSocket|s. |
| 57 static const int kSharedBufferSize = 8; |
| 58 // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap. |
| 59 struct SharedBuffer { |
| 60 SharedBuffer() : start(0), length(0) {} |
| 61 |
| 62 uint8 data[kSharedBufferSize]; |
| 63 size_t start; |
| 64 size_t length; |
| 65 }; |
| 66 |
| 67 // Callback used for pairing an A.Send() with B.Receieve() without blocking. |
| 68 typedef base::Callback<void()> OnSendCB; |
| 69 |
| 70 explicit MockSyncSocket(SharedBuffer* shared_buffer); |
| 71 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb); |
| 72 |
| 73 virtual size_t Send(const void* buffer, size_t length) OVERRIDE; |
| 74 virtual size_t Receive(void* buffer, size_t length) OVERRIDE; |
| 75 |
| 76 // When |in_failure_mode_| == true, the socket fails to send. |
| 77 void SetFailureMode(bool in_failure_mode) { |
| 78 in_failure_mode_ = in_failure_mode; |
| 79 } |
| 80 |
| 81 private: |
| 82 SharedBuffer* buffer_; |
| 83 const OnSendCB on_send_cb_; |
| 84 bool in_failure_mode_; |
| 85 }; |
| 86 |
| 87 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer) |
| 88 : buffer_(buffer), in_failure_mode_(false) { } |
| 89 |
| 90 MockSyncSocket::MockSyncSocket(SharedBuffer* buffer, const OnSendCB& on_send_cb) |
| 91 : buffer_(buffer), on_send_cb_(on_send_cb), in_failure_mode_(false) {} |
| 92 |
| 93 size_t MockSyncSocket::Send(const void* buffer, size_t length) { |
| 94 if (in_failure_mode_) |
| 95 return 0; |
| 96 |
| 97 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer)); |
| 98 for (size_t i = 0; i < length; i++, buffer_->length++) |
| 99 buffer_->data[buffer_->start + buffer_->length] = b[i]; |
| 100 |
| 101 on_send_cb_.Run(); |
| 102 return length; |
| 103 } |
| 104 |
| 105 size_t MockSyncSocket::Receive(void* buffer, size_t length) { |
| 106 uint8* b = static_cast<uint8*>(const_cast<void*>(buffer)); |
| 107 for (size_t i = buffer_->start; i < buffer_->length; i++, buffer_->start++) |
| 108 b[i] = buffer_->data[buffer_->start]; |
| 109 |
| 110 // Since buffer is used atomically, we can reset the buffer indices here. |
| 111 buffer_->start = buffer_->length = 0; |
| 112 return length; |
| 113 } |
| 114 |
| 115 //////////////////////////////////////////////////////////////////////////////// |
| 116 |
| 117 class FakeSpeechRecognizer { |
| 118 public: |
| 119 FakeSpeechRecognizer() : is_responsive_(true) {} |
| 120 ~FakeSpeechRecognizer() {} |
| 121 |
| 122 void Initialize( |
| 123 const blink::WebMediaStreamTrack& track, |
| 124 const media::AudioParameters& sink_params, |
| 125 const SpeechRecognitionAudioSourceProvider::OnStoppedCB& on_stopped_cb); |
| 126 |
| 127 // TODO(burnik): Move from the recognizer to the test. |
| 128 SpeechRecognitionAudioSourceProvider* SourceProvider() { |
| 129 return audio_source_provider_.get(); |
| 130 } |
| 131 |
| 132 // Emulates a single iteraton of a thread receiving on the socket. |
| 133 // Emulates the receive on the socket. This would normally be done on a |
| 134 // receiving thread's loop on the browser. |
| 135 void EmulateReceiveThreadLoopIteration() { |
| 136 // When not responsive do nothing as if the process is busy. |
| 137 if (!is_responsive_) |
| 138 return; |
| 139 |
| 140 local_socket_->Receive(shared_buffer_index_, sizeof(*shared_buffer_index_)); |
| 141 // Notify the producer that the audio buffer has been consumed. |
| 142 (*shared_buffer_index_)++; |
| 143 } |
| 144 |
| 145 // Used to simulate an unresponsive behaviour of the consumer. |
| 146 void SimulateResponsiveness(bool is_responsive) { |
| 147 is_responsive_ = is_responsive; |
| 148 } |
| 149 |
| 150 // Used to simulate a problem with sockets. |
| 151 void SetFailureModeOnForeignSocket(bool in_failure_mode) { |
| 152 foreign_socket_->SetFailureMode(in_failure_mode); |
| 153 } |
| 154 |
| 155 uint32 buffer_index() { return *shared_buffer_index_; } |
| 156 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); } |
| 157 |
| 158 private: |
| 159 bool is_responsive_; |
| 160 // Shared memory for the audio and synchronization. |
| 161 scoped_ptr<base::SharedMemory> shared_memory_; |
| 162 |
| 163 // Fake sockets shared buffer. |
| 164 scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_; |
| 165 scoped_ptr<MockSyncSocket> local_socket_; |
| 166 scoped_ptr<MockSyncSocket> foreign_socket_; |
| 167 |
| 168 // Audio bus wrapping the shared memory from the renderer. |
| 169 scoped_ptr<media::AudioBus> audio_track_bus_; |
| 170 |
| 171 uint32* shared_buffer_index_; |
| 172 // Producer. TODO(burnik): this should be outside the recognizer. |
| 173 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_; |
| 174 }; |
| 175 |
| 176 void FakeSpeechRecognizer::Initialize( |
| 177 const blink::WebMediaStreamTrack& track, |
| 178 const media::AudioParameters& sink_params, |
| 179 const SpeechRecognitionAudioSourceProvider::OnStoppedCB& on_stopped_cb) { |
| 180 // Shared memory is allocated, mapped and shared. |
| 181 uint32 shared_memory_size = sizeof(media::AudioInputBufferParameters) + |
| 182 media::AudioBus::CalculateMemorySize(sink_params); |
| 183 shared_memory_.reset(new base::SharedMemory()); |
| 184 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size)); |
| 185 |
| 186 base::SharedMemoryHandle foreign_memory_handle; |
| 187 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(), |
| 188 &foreign_memory_handle)); |
| 189 |
| 190 media::AudioInputBuffer* buffer = |
| 191 static_cast<media::AudioInputBuffer*>(shared_memory_->memory()); |
| 192 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio); |
| 193 |
| 194 // Reference to the counter used to synchronize. |
| 195 shared_buffer_index_ = &(buffer->params.size); |
| 196 *shared_buffer_index_ = 0U; |
| 197 |
| 198 // Create a shared buffer for the |MockSyncSocket|s. |
| 199 shared_buffer_.reset(new MockSyncSocket::SharedBuffer()); |
| 200 |
| 201 // Local socket will receive signals from the producer. |
| 202 local_socket_.reset(new MockSyncSocket(shared_buffer_.get())); |
| 203 |
| 204 // We automatically trigger a Receive when data is sent over the socket. |
| 205 foreign_socket_.reset(new MockSyncSocket( |
| 206 shared_buffer_.get(), |
| 207 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration, |
| 208 base::Unretained(this)))); |
| 209 |
| 210 // This is usually done to pair the sockets. Here it's not effective. |
| 211 base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_.get()); |
| 212 |
| 213 // Create the producer. TODO(burnik): move out of the recognizer. |
| 214 audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider( |
| 215 track, sink_params, foreign_memory_handle, foreign_socket_.get(), |
| 216 on_stopped_cb)); |
| 217 } |
| 218 |
| 219 //////////////////////////////////////////////////////////////////////////////// |
| 220 |
| 221 class SpeechRecognitionAudioSourceProviderTest : public testing::Test { |
| 222 public: |
| 223 SpeechRecognitionAudioSourceProviderTest() { |
| 224 // Audio Environment setup. |
| 225 source_params_.Reset(kInputFormat, kInputChannelLayout, kInputChannels, |
| 226 kInputSampleRate, kInputBitsPerSample, |
| 227 kInputFramesPerBuffer); |
| 228 |
| 229 sink_params_.Reset(kOutputFormat, kOutputChannelLayout, kOutputChannels, |
| 230 kOutputSampleRate, kOutputBitsPerSample, |
| 231 kOutputFramesPerBuffer); |
| 232 |
| 233 source_data_.reset(new int16[kSourceDataLength]); |
| 234 |
| 235 // Prepare the track and audio source. |
| 236 blink::WebMediaStreamTrack blink_track; |
| 237 PrepareTrackWithMediaStreamType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track); |
| 238 |
| 239 // Get the native track from the blink track and initialize. |
| 240 native_track_ = |
| 241 static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData()); |
| 242 native_track_->OnSetFormat(source_params_); |
| 243 |
| 244 // Create and initialize the consumer. |
| 245 recognizer_ = new FakeSpeechRecognizer(); |
| 246 recognizer_->Initialize( |
| 247 blink_track, sink_params_, |
| 248 base::Bind(&SpeechRecognitionAudioSourceProviderTest::StoppedCallback, |
| 249 base::Unretained(this))); |
| 250 |
| 251 // Init the producer. |
| 252 audio_source_provider_.reset(recognizer_->SourceProvider()); |
| 253 } |
| 254 |
| 255 // Mock callback for when the track is stopped. |
| 256 MOCK_METHOD0(StoppedCallback, void()); |
| 257 |
| 258 protected: |
| 259 static void PrepareTrackWithMediaStreamType( |
| 260 const MediaStreamType device_type, |
| 261 blink::WebMediaStreamTrack* blink_track) { |
| 262 // Device info. |
| 263 StreamDeviceInfo device_info(device_type, "Mock audio device", |
| 264 "mock_audio_device_id"); |
| 265 |
| 266 // Constraints. |
| 267 MockMediaConstraintFactory constraint_factory; |
| 268 const blink::WebMediaConstraints constraints = |
| 269 constraint_factory.CreateWebMediaConstraints(); |
| 270 |
| 271 // Capturer. |
| 272 scoped_refptr<WebRtcAudioCapturer> capturer( |
| 273 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL, |
| 274 NULL)); |
| 275 |
| 276 // Adapter. |
| 277 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter( |
| 278 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL)); |
| 279 |
| 280 // Native track. |
| 281 scoped_ptr<WebRtcLocalAudioTrack> native_track( |
| 282 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL)); |
| 283 |
| 284 // Blink audio source. |
| 285 blink::WebMediaStreamSource blink_audio_source; |
| 286 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"), |
| 287 blink::WebMediaStreamSource::TypeAudio, |
| 288 base::UTF8ToUTF16("dummy_source_name")); |
| 289 MediaStreamSource::SourceStoppedCallback cb; |
| 290 blink_audio_source.setExtraData( |
| 291 new MediaStreamAudioSource(-1, device_info, cb, NULL)); |
| 292 |
| 293 // Blink track. |
| 294 blink_track->initialize(blink::WebString::fromUTF8("audio_track"), |
| 295 blink_audio_source); |
| 296 blink_track->setExtraData(native_track.release()); |
| 297 } |
| 298 |
| 299 // Emulates an audio capture device capturing data from the source. |
| 300 inline void CaptureAudio(const size_t buffers) { |
| 301 for (size_t i = 0; i < buffers; ++i) |
| 302 native_track_->Capture(source_data_.get(), |
| 303 base::TimeDelta::FromMilliseconds(0), 1, false, |
| 304 false); |
| 305 } |
| 306 |
| 307 // Helper method to verify captured audio data has been consumed. |
| 308 inline void AssertConsumedBuffers(const size_t buffer_index) { |
| 309 ASSERT_EQ(buffer_index, recognizer_->buffer_index()); |
| 310 } |
| 311 |
| 312 // Helper method to push audio data to producer and verify it was consumed. |
| 313 inline void CaptureAudioAndAssertConsumedBuffers(const size_t buffers, |
| 314 const size_t buffer_index) { |
| 315 CaptureAudio(buffers); |
| 316 AssertConsumedBuffers(buffer_index); |
| 317 } |
| 318 |
| 319 // Producer. |
| 320 scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_; |
| 321 // Consumer. |
| 322 FakeSpeechRecognizer* recognizer_; |
| 323 // Audio related members. |
| 324 scoped_ptr<int16[]> source_data_; |
| 325 media::AudioParameters source_params_; |
| 326 media::AudioParameters sink_params_; |
| 327 WebRtcLocalAudioTrack* native_track_; |
| 328 }; |
| 329 |
| 330 //////////////////////////////////////////////////////////////////////////////// |
| 331 |
| 332 TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckIsSupportedTrackType) { |
| 333 typedef std::map<MediaStreamType, bool> SupportedTrackPolicy; |
| 334 // This test must be aligned with the policy of supported tracks. |
| 335 SupportedTrackPolicy p; |
| 336 p[MEDIA_NO_SERVICE] = false; |
| 337 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // Only one supported for now. |
| 338 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false; |
| 339 p[MEDIA_TAB_AUDIO_CAPTURE] = false; |
| 340 p[MEDIA_TAB_VIDEO_CAPTURE] = false; |
| 341 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false; |
| 342 p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false; |
| 343 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false; |
| 344 // Ensure this test gets updated along with |content::MediaStreamType| enum. |
| 345 EXPECT_EQ(NUM_MEDIA_TYPES, p.size()); |
| 346 // Check the the entire policy. |
| 347 for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) { |
| 348 blink::WebMediaStreamTrack blink_track; |
| 349 PrepareTrackWithMediaStreamType(it->first, &blink_track); |
| 350 ASSERT_EQ( |
| 351 it->second, |
| 352 SpeechRecognitionAudioSourceProvider::IsSupportedTrack(blink_track)); |
| 353 } |
| 354 } |
| 355 |
| 356 TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) { |
| 357 AssertConsumedBuffers(0U); |
| 358 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); |
| 359 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U); |
| 360 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 3U); |
| 361 } |
| 362 |
| 363 TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) { |
| 364 // Fill audio input frames with 0, 1, 2, 3, ..., 440. |
| 365 for (size_t i = 0; i < kSourceDataLength; ++i) |
| 366 source_data_[i] = i; |
| 367 |
| 368 const size_t num_frames_to_test = 12; |
| 369 const size_t sink_data_length = kOutputFramesPerBuffer * kOutputChannels; |
| 370 int16 sink_data[sink_data_length]; |
| 371 media::AudioBus* sink_bus = recognizer_->audio_bus(); |
| 372 |
| 373 // Render the audio data from the recognizer. |
| 374 sink_bus->ToInterleaved(sink_bus->frames(), |
| 375 sink_params_.bits_per_sample() / 8, sink_data); |
| 376 |
| 377 // Test both channels are zeroed out before we trigger resampling. |
| 378 for (size_t i = 0; i < num_frames_to_test; ++i) { |
| 379 ASSERT_EQ(0, sink_data[i * 2]); |
| 380 ASSERT_EQ(0, sink_data[i * 2 + 1]); |
| 381 } |
| 382 |
| 383 // Trigger the source provider to resample the input data. |
| 384 AssertConsumedBuffers(0U); |
| 385 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); |
| 386 |
| 387 // Render the audio data from the recognizer. |
| 388 sink_bus->ToInterleaved(sink_bus->frames(), |
| 389 sink_params_.bits_per_sample() / 8, sink_data); |
| 390 |
| 391 // Resampled data expected frames - based on |source_data_|. |
| 392 // Note: these values also depend on input/output audio params. |
| 393 const int16 expected_data[num_frames_to_test] = {0, 2, 5, 8, 11, 13, |
| 394 16, 19, 22, 24, 27, 30}; |
| 395 |
| 396 // Test both channels have same resampled data. |
| 397 for (size_t i = 0; i < num_frames_to_test; ++i) { |
| 398 ASSERT_EQ(expected_data[i], sink_data[i * 2]); |
| 399 ASSERT_EQ(expected_data[i], sink_data[i * 2 + 1]); |
| 400 } |
| 401 } |
| 402 |
| 403 TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) { |
| 404 // (2) Start out with no problems. |
| 405 AssertConsumedBuffers(0U); |
| 406 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); |
| 407 |
| 408 // (2) A failure occurs (socket cannot to send). |
| 409 recognizer_->SetFailureModeOnForeignSocket(true); |
| 410 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); |
| 411 } |
| 412 |
| 413 TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) { |
| 414 AssertConsumedBuffers(0U); |
| 415 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); |
| 416 EXPECT_CALL(*this, StoppedCallback()).Times(1); |
| 417 |
| 418 native_track_->Stop(); |
| 419 CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U); |
| 420 } |
| 421 |
| 422 } // namespace content |
OLD | NEW |