OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/renderer/media/speech_recognition_audio_sink.h" | 5 #include "content/renderer/media/speech_recognition_audio_sink.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 #include <string.h> | 9 #include <string.h> |
10 #include <utility> | 10 #include <utility> |
11 | 11 |
12 #include "base/bind.h" | 12 #include "base/bind.h" |
13 #include "base/macros.h" | 13 #include "base/macros.h" |
| 14 #include "base/strings/utf_string_conversions.h" |
14 #include "content/renderer/media/media_stream_audio_source.h" | 15 #include "content/renderer/media/media_stream_audio_source.h" |
15 #include "content/renderer/media/media_stream_audio_track.h" | 16 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h" |
16 #include "content/renderer/media/webrtc/mock_peer_connection_dependency_factory.
h" | 17 #include "content/renderer/media/webrtc_local_audio_track.h" |
17 #include "media/base/audio_bus.h" | 18 #include "media/base/audio_bus.h" |
18 #include "media/base/audio_parameters.h" | 19 #include "media/base/audio_parameters.h" |
19 #include "testing/gmock/include/gmock/gmock.h" | 20 #include "testing/gmock/include/gmock/gmock.h" |
20 #include "testing/gtest/include/gtest/gtest.h" | 21 #include "testing/gtest/include/gtest/gtest.h" |
21 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h" | 22 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h" |
22 #include "third_party/WebKit/public/platform/WebString.h" | |
23 #include "third_party/WebKit/public/web/WebHeap.h" | 23 #include "third_party/WebKit/public/web/WebHeap.h" |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 // Supported speech recognition audio parameters. | 27 // Supported speech recognition audio parameters. |
28 const int kSpeechRecognitionSampleRate = 16000; | 28 const int kSpeechRecognitionSampleRate = 16000; |
29 const int kSpeechRecognitionFramesPerBuffer = 1600; | 29 const int kSpeechRecognitionFramesPerBuffer = 1600; |
30 | 30 |
31 // Input audio format. | 31 // Input audio format. |
32 const media::AudioParameters::Format kInputFormat = | 32 const media::AudioParameters::Format kInputFormat = |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 // Audio bus wrapping the shared memory from the renderer. | 197 // Audio bus wrapping the shared memory from the renderer. |
198 std::unique_ptr<media::AudioBus> audio_track_bus_; | 198 std::unique_ptr<media::AudioBus> audio_track_bus_; |
199 | 199 |
200 DISALLOW_COPY_AND_ASSIGN(FakeSpeechRecognizer); | 200 DISALLOW_COPY_AND_ASSIGN(FakeSpeechRecognizer); |
201 }; | 201 }; |
202 | 202 |
203 } // namespace | 203 } // namespace |
204 | 204 |
205 namespace content { | 205 namespace content { |
206 | 206 |
207 namespace { | |
208 | |
209 class TestDrivenAudioSource : public MediaStreamAudioSource { | |
210 public: | |
211 TestDrivenAudioSource() : MediaStreamAudioSource(true) {} | |
212 ~TestDrivenAudioSource() final {} | |
213 | |
214 // Expose protected methods as public for testing. | |
215 using MediaStreamAudioSource::SetFormat; | |
216 using MediaStreamAudioSource::DeliverDataToTracks; | |
217 }; | |
218 | |
219 } // namespace | |
220 | |
221 class SpeechRecognitionAudioSinkTest : public testing::Test { | 207 class SpeechRecognitionAudioSinkTest : public testing::Test { |
222 public: | 208 public: |
223 SpeechRecognitionAudioSinkTest() {} | 209 SpeechRecognitionAudioSinkTest() {} |
224 | 210 |
225 ~SpeechRecognitionAudioSinkTest() { | 211 ~SpeechRecognitionAudioSinkTest() { |
226 blink_source_.reset(); | |
227 blink_track_.reset(); | |
228 speech_audio_sink_.reset(); | 212 speech_audio_sink_.reset(); |
229 blink::WebHeap::collectAllGarbageForTesting(); | 213 blink::WebHeap::collectAllGarbageForTesting(); |
230 } | 214 } |
231 | 215 |
232 // Initializes the producer and consumer with specified audio parameters. | 216 // Initializes the producer and consumer with specified audio parameters. |
233 // Returns the minimal number of input audio buffers which need to be captured | 217 // Returns the minimal number of input audio buffers which need to be captured |
234 // before they get sent to the consumer. | 218 // before they get sent to the consumer. |
235 uint32_t Initialize(int input_sample_rate, | 219 uint32_t Initialize(int input_sample_rate, |
236 int input_frames_per_buffer, | 220 int input_frames_per_buffer, |
237 int output_sample_rate, | 221 int output_sample_rate, |
238 int output_frames_per_buffer) { | 222 int output_frames_per_buffer) { |
239 // Audio Environment setup. | 223 // Audio Environment setup. |
240 source_params_.Reset(kInputFormat, | 224 source_params_.Reset(kInputFormat, |
241 kInputChannelLayout, | 225 kInputChannelLayout, |
242 input_sample_rate, | 226 input_sample_rate, |
243 kInputBitsPerSample, | 227 kInputBitsPerSample, |
244 input_frames_per_buffer); | 228 input_frames_per_buffer); |
245 sink_params_.Reset(kOutputFormat, | 229 sink_params_.Reset(kOutputFormat, |
246 kOutputChannelLayout, | 230 kOutputChannelLayout, |
247 output_sample_rate, | 231 output_sample_rate, |
248 kOutputBitsPerSample, | 232 kOutputBitsPerSample, |
249 output_frames_per_buffer); | 233 output_frames_per_buffer); |
250 source_bus_ = | 234 source_bus_ = |
251 media::AudioBus::Create(kInputChannels, input_frames_per_buffer); | 235 media::AudioBus::Create(kInputChannels, input_frames_per_buffer); |
252 source_bus_->Zero(); | 236 source_bus_->Zero(); |
253 first_frame_capture_time_ = base::TimeTicks::Now(); | 237 first_frame_capture_time_ = base::TimeTicks::Now(); |
254 sample_frames_captured_ = 0; | 238 sample_frames_captured_ = 0; |
255 | 239 |
256 // Prepare the track and audio source. | 240 // Prepare the track and audio source. |
257 PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track_); | 241 blink::WebMediaStreamTrack blink_track; |
258 blink_source_ = blink_track_.source(); | 242 PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track); |
259 static_cast<TestDrivenAudioSource*>( | 243 |
260 MediaStreamAudioSource::From(blink_source_))->SetFormat(source_params_); | 244 // Get the native track from the blink track and initialize. |
| 245 native_track_ = |
| 246 static_cast<WebRtcLocalAudioTrack*>(blink_track.getExtraData()); |
| 247 native_track_->OnSetFormat(source_params_); |
261 | 248 |
262 // Create and initialize the consumer. | 249 // Create and initialize the consumer. |
263 recognizer_.reset(new FakeSpeechRecognizer()); | 250 recognizer_.reset(new FakeSpeechRecognizer()); |
264 base::SharedMemoryHandle foreign_memory_handle; | 251 base::SharedMemoryHandle foreign_memory_handle; |
265 recognizer_->Initialize(blink_track_, sink_params_, &foreign_memory_handle); | 252 recognizer_->Initialize(blink_track, sink_params_, &foreign_memory_handle); |
266 | 253 |
267 // Create the producer. | 254 // Create the producer. |
268 std::unique_ptr<base::SyncSocket> sending_socket( | 255 std::unique_ptr<base::SyncSocket> sending_socket( |
269 recognizer_->sending_socket()); | 256 recognizer_->sending_socket()); |
270 speech_audio_sink_.reset(new SpeechRecognitionAudioSink( | 257 speech_audio_sink_.reset(new SpeechRecognitionAudioSink( |
271 blink_track_, sink_params_, foreign_memory_handle, | 258 blink_track, sink_params_, foreign_memory_handle, |
272 std::move(sending_socket), | 259 std::move(sending_socket), |
273 base::Bind(&SpeechRecognitionAudioSinkTest::StoppedCallback, | 260 base::Bind(&SpeechRecognitionAudioSinkTest::StoppedCallback, |
274 base::Unretained(this)))); | 261 base::Unretained(this)))); |
275 | 262 |
276 // Return number of buffers needed to trigger resampling and consumption. | 263 // Return number of buffers needed to trigger resampling and consumption. |
277 return static_cast<uint32_t>(std::ceil( | 264 return static_cast<uint32_t>(std::ceil( |
278 static_cast<double>(output_frames_per_buffer * input_sample_rate) / | 265 static_cast<double>(output_frames_per_buffer * input_sample_rate) / |
279 (input_frames_per_buffer * output_sample_rate))); | 266 (input_frames_per_buffer * output_sample_rate))); |
280 } | 267 } |
281 | 268 |
282 // Mock callback expected to be called when the track is stopped. | 269 // Mock callback expected to be called when the track is stopped. |
283 MOCK_METHOD0(StoppedCallback, void()); | 270 MOCK_METHOD0(StoppedCallback, void()); |
284 | 271 |
285 protected: | 272 protected: |
286 // Prepares a blink track of a given MediaStreamType and attaches the native | 273 // Prepares a blink track of a given MediaStreamType and attaches the native |
287 // track which can be used to capture audio data and pass it to the producer. | 274 // track which can be used to capture audio data and pass it to the producer. |
288 void PrepareBlinkTrackOfType(const MediaStreamType device_type, | 275 static void PrepareBlinkTrackOfType( |
289 blink::WebMediaStreamTrack* blink_track) { | 276 const MediaStreamType device_type, |
290 blink::WebMediaStreamSource blink_source; | 277 blink::WebMediaStreamTrack* blink_track) { |
291 blink_source.initialize(blink::WebString::fromUTF8("dummy_source_id"), | 278 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter( |
292 blink::WebMediaStreamSource::TypeAudio, | 279 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL)); |
293 blink::WebString::fromUTF8("dummy_source_name"), | 280 std::unique_ptr<WebRtcLocalAudioTrack> native_track( |
294 false /* remote */); | 281 new WebRtcLocalAudioTrack(adapter.get())); |
295 TestDrivenAudioSource* const audio_source = new TestDrivenAudioSource(); | 282 blink::WebMediaStreamSource blink_audio_source; |
296 audio_source->SetDeviceInfo( | 283 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"), |
297 StreamDeviceInfo(device_type, "Mock device", "mock_device_id")); | 284 blink::WebMediaStreamSource::TypeAudio, |
298 blink_source.setExtraData(audio_source); // Takes ownership. | 285 base::UTF8ToUTF16("dummy_source_name"), |
299 | 286 false /* remote */); |
| 287 MediaStreamSource::SourceStoppedCallback cb; |
| 288 blink_audio_source.setExtraData(new MediaStreamAudioSource( |
| 289 -1, StreamDeviceInfo(device_type, "Mock device", "mock_device_id"), cb, |
| 290 nullptr)); |
300 blink_track->initialize(blink::WebString::fromUTF8("dummy_track"), | 291 blink_track->initialize(blink::WebString::fromUTF8("dummy_track"), |
301 blink_source); | 292 blink_audio_source); |
302 ASSERT_TRUE(audio_source->ConnectToTrack(*blink_track)); | 293 blink_track->setExtraData(native_track.release()); |
303 } | 294 } |
304 | 295 |
305 // Emulates an audio capture device capturing data from the source. | 296 // Emulates an audio capture device capturing data from the source. |
306 inline void CaptureAudio(const uint32_t buffers) { | 297 inline void CaptureAudio(const uint32_t buffers) { |
307 for (uint32_t i = 0; i < buffers; ++i) { | 298 for (uint32_t i = 0; i < buffers; ++i) { |
308 const base::TimeTicks estimated_capture_time = first_frame_capture_time_ + | 299 const base::TimeTicks estimated_capture_time = first_frame_capture_time_ + |
309 (sample_frames_captured_ * base::TimeDelta::FromSeconds(1) / | 300 (sample_frames_captured_ * base::TimeDelta::FromSeconds(1) / |
310 source_params_.sample_rate()); | 301 source_params_.sample_rate()); |
311 static_cast<TestDrivenAudioSource*>( | 302 native_track()->Capture(*source_bus_, estimated_capture_time); |
312 MediaStreamAudioSource::From(blink_source_)) | |
313 ->DeliverDataToTracks(*source_bus_, estimated_capture_time); | |
314 sample_frames_captured_ += source_bus_->frames(); | 303 sample_frames_captured_ += source_bus_->frames(); |
315 } | 304 } |
316 } | 305 } |
317 | 306 |
318 // Used to simulate a problem with sockets. | 307 // Used to simulate a problem with sockets. |
319 void SetFailureModeOnForeignSocket(bool in_failure_mode) { | 308 void SetFailureModeOnForeignSocket(bool in_failure_mode) { |
320 recognizer()->sending_socket()->SetFailureMode(in_failure_mode); | 309 recognizer()->sending_socket()->SetFailureMode(in_failure_mode); |
321 } | 310 } |
322 | 311 |
323 // Helper method for verifying captured audio data has been consumed. | 312 // Helper method for verifying captured audio data has been consumed. |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
357 << ")"; | 346 << ")"; |
358 } | 347 } |
359 } | 348 } |
360 | 349 |
361 media::AudioBus* source_bus() const { return source_bus_.get(); } | 350 media::AudioBus* source_bus() const { return source_bus_.get(); } |
362 | 351 |
363 FakeSpeechRecognizer* recognizer() const { return recognizer_.get(); } | 352 FakeSpeechRecognizer* recognizer() const { return recognizer_.get(); } |
364 | 353 |
365 const media::AudioParameters& sink_params() const { return sink_params_; } | 354 const media::AudioParameters& sink_params() const { return sink_params_; } |
366 | 355 |
367 MediaStreamAudioTrack* native_track() const { | 356 WebRtcLocalAudioTrack* native_track() const { return native_track_; } |
368 return MediaStreamAudioTrack::From(blink_track_); | |
369 } | |
370 | 357 |
371 private: | 358 private: |
372 MockPeerConnectionDependencyFactory mock_dependency_factory_; | |
373 | |
374 // Producer. | 359 // Producer. |
375 std::unique_ptr<SpeechRecognitionAudioSink> speech_audio_sink_; | 360 std::unique_ptr<SpeechRecognitionAudioSink> speech_audio_sink_; |
376 | 361 |
377 // Consumer. | 362 // Consumer. |
378 std::unique_ptr<FakeSpeechRecognizer> recognizer_; | 363 std::unique_ptr<FakeSpeechRecognizer> recognizer_; |
379 | 364 |
380 // Audio related members. | 365 // Audio related members. |
381 std::unique_ptr<media::AudioBus> source_bus_; | 366 std::unique_ptr<media::AudioBus> source_bus_; |
382 media::AudioParameters source_params_; | 367 media::AudioParameters source_params_; |
383 media::AudioParameters sink_params_; | 368 media::AudioParameters sink_params_; |
384 blink::WebMediaStreamSource blink_source_; | 369 WebRtcLocalAudioTrack* native_track_; |
385 blink::WebMediaStreamTrack blink_track_; | |
386 | 370 |
387 base::TimeTicks first_frame_capture_time_; | 371 base::TimeTicks first_frame_capture_time_; |
388 int64_t sample_frames_captured_; | 372 int64_t sample_frames_captured_; |
389 | 373 |
390 DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSinkTest); | 374 DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSinkTest); |
391 }; | 375 }; |
392 | 376 |
393 // Not all types of tracks are supported. This test checks if that policy is | 377 // Not all types of tracks are supported. This test checks if that policy is |
394 // implemented correctly. | 378 // implemented correctly. |
395 TEST_F(SpeechRecognitionAudioSinkTest, CheckIsSupportedAudioTrack) { | 379 TEST_F(SpeechRecognitionAudioSinkTest, CheckIsSupportedAudioTrack) { |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
537 const uint32_t buffers_per_notification = Initialize(44100, 441, 16000, 1600); | 521 const uint32_t buffers_per_notification = Initialize(44100, 441, 16000, 1600); |
538 AssertConsumedBuffers(0U); | 522 AssertConsumedBuffers(0U); |
539 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U); | 523 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U); |
540 EXPECT_CALL(*this, StoppedCallback()).Times(1); | 524 EXPECT_CALL(*this, StoppedCallback()).Times(1); |
541 | 525 |
542 native_track()->Stop(); | 526 native_track()->Stop(); |
543 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U); | 527 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U); |
544 } | 528 } |
545 | 529 |
546 } // namespace content | 530 } // namespace content |
OLD | NEW |