Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Unified Diff: content/renderer/speech_recognition_audio_source_provider_unittest.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add unit test and refactor Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/renderer/speech_recognition_audio_source_provider_unittest.cc
diff --git a/content/renderer/speech_recognition_audio_source_provider_unittest.cc b/content/renderer/speech_recognition_audio_source_provider_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4f575da824f4325dd854de7c612ddc199d1e7d7b
--- /dev/null
+++ b/content/renderer/speech_recognition_audio_source_provider_unittest.cc
@@ -0,0 +1,483 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/renderer/speech_recognition_audio_source_provider.h"
+
+#include "base/logging.h"
+#include "base/strings/utf_string_conversions.h"
+#include "content/renderer/media/media_stream_audio_source.h"
+#include "content/renderer/media/mock_media_constraint_factory.h"
+#include "content/renderer/media/rtc_media_constraints.h"
+#include "content/renderer/media/webrtc/mock_peer_connection_dependency_factory.h"
+#include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
+#include "content/renderer/media/webrtc_audio_capturer.h"
+#include "content/renderer/media/webrtc_audio_device_impl.h"
+#include "content/renderer/media/webrtc_local_audio_source_provider.h"
+#include "content/renderer/media/webrtc_local_audio_track.h"
+#include "media/audio/audio_parameters.h"
+#include "media/base/audio_bus.h"
+#include "media/base/audio_capturer_source.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
+
+namespace content {
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Buffer to be shared between two fake sockets.
burnik 2014/09/12 12:09:12 'fake' is interchangeable with 'mock' regarding so
burnik 2014/09/15 15:00:07 Done.
+struct SharedBuffer {
+ uint8 data[100000];
no longer working on chromium 2014/09/15 08:31:29 noooooo, you can't allocate 100000 bytes in stack
burnik 2014/09/15 15:00:07 This is allocated on and owned by the FakeSpeechRe
+ size_t start;
+ size_t length;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Fake socket used for Send/Receive.
+// Data is written and read from a shared buffer used as a FIFO and there is
+// no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
+class MockSyncSocket : public base::SyncSocket {
+ public:
+ typedef base::Callback<void()> OnSendCB;
+
+ explicit MockSyncSocket(SharedBuffer* shared_buffer);
+ MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb);
+
+ virtual size_t Send(const void* buffer, size_t length) OVERRIDE;
+ virtual size_t Receive(void* buffer, size_t length) OVERRIDE;
+
+ // When |in_failure_mode_| == true, the socket fails to send.
+ void SetFailureMode(bool in_failure_mode) {
+ in_failure_mode_ = in_failure_mode;
+ }
+
+ private:
+ SharedBuffer* buffer_;
+ const OnSendCB on_send_cb_;
+ bool in_failure_mode_;
+};
+
+MockSyncSocket::MockSyncSocket(SharedBuffer* buffer)
+ : buffer_(buffer), in_failure_mode_(false) {}
+
+MockSyncSocket::MockSyncSocket(SharedBuffer* buffer, const OnSendCB& on_send_cb)
+ : buffer_(buffer), on_send_cb_(on_send_cb), in_failure_mode_(false) {}
+
+size_t MockSyncSocket::Send(const void* buffer, size_t length) {
+ if (in_failure_mode_) return 0;
+ uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
+ for (size_t i = 0; i < length; i++, buffer_->length++)
+ buffer_->data[buffer_->start + buffer_->length] = b[i];
+ on_send_cb_.Run();
+ return length;
+}
+
+size_t MockSyncSocket::Receive(void* buffer, size_t length) {
+ uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
+ for (size_t i = buffer_->start; i < buffer_->length; i++, buffer_->start++)
+ b[i] = buffer_->data[buffer_->start];
+ return length;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class FakeSpeechRecognizer {
+ public:
+ FakeSpeechRecognizer() : is_responsive_(true) {}
+ ~FakeSpeechRecognizer() {}
+
+ void Initialize(
+ const blink::WebMediaStreamTrack& track,
+ const media::AudioParameters& sink_params,
+ const SpeechRecognitionAudioSourceProvider::OnErrorCB& on_error_cb);
+
+ // TODO(burnik): Move from the recognizer to test.
+ SpeechRecognitionAudioSourceProvider* SourceProvider();
+
+ // Emulates a single iteraton of a thread receiving on the socket.
+ virtual void EmulateReceiveThreadLoopIteration();
+
+ // Used to simulate an unresponsive behaviour of the consumer.
+ void SimulateResponsiveness(bool is_responsive) {
+ is_responsive_ = is_responsive;
+ }
+ // Used to simulate a problem with sockets.
+ void SetFailureModeOnForeignSocket(bool in_failure_mode) {
+ DCHECK(foreign_socket_.get());
+ foreign_socket_->SetFailureMode(in_failure_mode);
+ }
+
+ uint32 buffer_index() { return *shared_buffer_index_; }
+ media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
+
+ private:
+ bool is_responsive_;
+ // Shared memory for the audio and synchronization.
+ scoped_ptr<base::SharedMemory> shared_memory_;
+
+ // Fake sockets shared buffer.
+ scoped_ptr<SharedBuffer> shared_buffer_;
+ scoped_ptr<MockSyncSocket> local_socket_;
+ scoped_ptr<MockSyncSocket> foreign_socket_;
+
+ // Audio bus wrapping the shared memory from the renderer.
+ scoped_ptr<media::AudioBus> audio_track_bus_;
+
+ uint32* shared_buffer_index_;
+ // Producer. TODO(burnik): this should be outside the recognizer.
+ scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
+};
+
+void FakeSpeechRecognizer::Initialize(
+ const blink::WebMediaStreamTrack& track,
+ const media::AudioParameters& sink_params,
+ const SpeechRecognitionAudioSourceProvider::OnErrorCB& on_error_cb) {
+ // Shared memory is allocated, mapped and shared.
+ uint32 shared_memory_size = sizeof(media::AudioInputBufferParameters) +
+ media::AudioBus::CalculateMemorySize(sink_params);
+ shared_memory_.reset(new base::SharedMemory());
+
+ ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size))
+ << "Failed to create shared memory";
+
+ base::SharedMemoryHandle foreign_memory_handle;
+ ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
+ &foreign_memory_handle))
+ << "Failed to share memory";
+
+ media::AudioInputBuffer* buffer =
+ static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
+ audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
+
+ // Reference to the counter used to synchronize.
+ shared_buffer_index_ = &(buffer->params.size);
+ *shared_buffer_index_ = 0U;
+
+ // Create a shared buffer for the |MockSyncSocket|s.
+ shared_buffer_.reset(new SharedBuffer());
+ ASSERT_EQ(shared_buffer_->start, 0U);
+ ASSERT_EQ(shared_buffer_->length, 0U);
+
+ // Local socket will receive signals from the producer.
+ local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
+
+ // We automatically trigger a Receive when data is sent over the socket.
+ foreign_socket_.reset(new MockSyncSocket(
+ shared_buffer_.get(),
+ base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
+ base::Unretained(this))));
+
+ // This is usually done to pair the sockets. Here it's not effective.
+ base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_.get());
+
+ // Create the producer. TODO(burnik): move out of the recognizer.
+ audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(
+ track, sink_params, foreign_memory_handle, foreign_socket_.get(),
+ on_error_cb));
+}
+
+// TODO(burnik): Remove from the recognizer.
+SpeechRecognitionAudioSourceProvider* FakeSpeechRecognizer::SourceProvider() {
+ return audio_source_provider_.get();
+}
+
+// Emulates the receive on the socket. This would normally be done on a
+// receiving thread's loop on the browser.
+void FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration() {
+ // When not responsive do nothing as if the process is busy.
+ if (!is_responsive_) return;
+ local_socket_->Receive(shared_buffer_index_, sizeof(*shared_buffer_index_));
+ // Notify the producer that the audio buffer has been consumed.
+ (*shared_buffer_index_)++;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Input audio format
+static const media::AudioParameters::Format kInputFormat =
+ media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
+const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
+const int kInputChannels = 1;
+const int kInputSampleRate = 44100;
+const int kInputBitsPerSample = 16;
+const int kInputFramesPerBuffer = 441;
+
+// Output audio format
+const media::AudioParameters::Format kOutputFormat =
+ media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
+const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
+const int kOutputChannels = 2;
+const int kOutputSampleRate = 16000;
+const int kOutputBitsPerSample = 16;
+const int kOutputFramesPerBuffer = 1600;
+
+// Minimal number of buffers which trigger a single socket transfer.
+const size_t kBuffersPerNotification =
+ (kOutputFramesPerBuffer * kInputSampleRate) /
+ (kInputFramesPerBuffer * kOutputSampleRate);
+
+// Number of buffers which make the FIFO ready for consumption.
+const size_t kBuffersForReadyFifo =
+ (kOutputFramesPerBuffer * kInputSampleRate) / kOutputSampleRate;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class SpeechRecognitionAudioSourceProviderTest : public testing::Test {
+ public:
+ SpeechRecognitionAudioSourceProviderTest() {}
+
+ // Mock for error callback.
+ MOCK_METHOD1(ErrorCallback,
+ void(SpeechRecognitionAudioSourceProvider::ErrorState));
+
+ // testing::Test methods.
+ virtual void SetUp() OVERRIDE {
+ // Audio Environment setup.
+ source_params_.Reset(kInputFormat, kInputChannelLayout, kInputChannels,
+ kInputSampleRate, kInputBitsPerSample,
+ kInputFramesPerBuffer);
+
+ sink_params_.Reset(kOutputFormat, kOutputChannelLayout, kOutputChannels,
+ kOutputSampleRate, kOutputBitsPerSample,
+ kOutputFramesPerBuffer);
+
+ source_data_length_ =
+ source_params_.frames_per_buffer() * source_params_.channels();
+ source_data_.reset(new int16[source_data_length_]);
+
+ MockMediaConstraintFactory constraint_factory;
+ scoped_refptr<WebRtcAudioCapturer> capturer(
+ WebRtcAudioCapturer::CreateCapturer(
+ -1, StreamDeviceInfo(),
+ constraint_factory.CreateWebMediaConstraints(), NULL, NULL));
+ scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
+ WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
+ native_track_ = new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL);
+ native_track_->OnSetFormat(source_params_);
+
+ blink::WebMediaStreamSource audio_source;
+ audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
+ blink::WebMediaStreamSource::TypeAudio,
+ base::UTF8ToUTF16("dummy_source_name"));
+ blink_track_.initialize(blink::WebString::fromUTF8("audio_track"),
+ audio_source);
+ blink_track_.setExtraData(native_track_);
+
+ // Create the consumer.
+ recognizer_ = new FakeSpeechRecognizer();
+ recognizer_->Initialize(
+ blink_track_, sink_params_,
+ base::Bind(&SpeechRecognitionAudioSourceProviderTest::ErrorCallback,
+ base::Unretained(this)));
+
+ // Init the producer.
+ audio_source_provider_.reset(recognizer_->SourceProvider());
+ }
+
+ virtual void TearDown() OVERRIDE { blink_track_.reset(); }
+
+ protected:
+ // TODO(burnik): Recheck steps and simplify method. Try reusing in |SetUp()|.
+ static blink::WebMediaStreamTrack CreateBlinkTrackWithMediaStreamType(
+ const MediaStreamType device_type) {
+ MockMediaConstraintFactory constraint_factory;
+
+ MediaStreamSource::SourceStoppedCallback cb;
+
+ StreamDeviceInfo device_info(device_type, "Mock audio device",
+ "mock_audio_device_id");
+ WebRtcAudioDeviceImpl* device = new WebRtcAudioDeviceImpl();
+ scoped_ptr<MediaStreamAudioSource> stream_audio_source(
+ new MediaStreamAudioSource(-1, device_info, cb, NULL));
+ const blink::WebMediaConstraints constraints =
+ constraint_factory.CreateWebMediaConstraints();
+ MockPeerConnectionDependencyFactory* factory =
+ new MockPeerConnectionDependencyFactory();
+ scoped_refptr<webrtc::AudioSourceInterface> audio_source =
+ factory->CreateLocalAudioSource(new RTCMediaConstraints(constraints));
+ scoped_refptr<WebRtcAudioCapturer> capturer(
+ WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints,
+ device, stream_audio_source.get()));
+ scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
+ WebRtcLocalAudioTrackAdapter::Create(std::string(),
+ audio_source.get()));
+ scoped_ptr<WebRtcLocalAudioTrack> native_track(
+ new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
+
+ blink::WebMediaStreamSource blink_audio_source;
+ blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
+ blink::WebMediaStreamSource::TypeAudio,
+ base::UTF8ToUTF16("dummy_source_name"));
+ blink_audio_source.setExtraData(stream_audio_source.release());
+
+ blink::WebMediaStreamTrack blink_track;
+ blink_track.initialize(blink::WebString::fromUTF8("audio_track"),
+ blink_audio_source);
+ blink_track.setExtraData(native_track.release());
+
+ return blink_track;
+ }
+
+ // Emulates an audio capture device capturing data from the source.
+ inline void CaptureAudio(const size_t buffers) {
+ DCHECK(native_track_);
+ for (size_t i = 0; i < buffers; ++i)
+ native_track_->Capture(source_data_.get(),
+ base::TimeDelta::FromMilliseconds(0), 1, false,
+ false);
+ }
+
+ // Helper method to verify captured audio data has been consumed.
+ inline void AssertConsumedBuffers(const size_t buffer_index) {
+ ASSERT_EQ(buffer_index, recognizer_->buffer_index());
+ }
+
+ // Helper method to push audio data to producer and verify it was consumed.
+ inline void CaptureAudioAndAssertConsumedBuffers(const size_t buffers,
+ const size_t buffer_index) {
+ CaptureAudio(buffers);
+ AssertConsumedBuffers(buffer_index);
+ }
+
+ protected:
+ // Producer.
+ scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
+ // Consumer.
+ FakeSpeechRecognizer* recognizer_;
+ // Audio related members.
+ size_t source_data_length_;
+ media::AudioParameters source_params_;
+ scoped_ptr<int16[]> source_data_;
+ size_t sink_data_length_;
+ media::AudioParameters sink_params_;
+ blink::WebMediaStreamTrack blink_track_;
+ WebRtcLocalAudioTrack* native_track_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckAllowedAudioTrackType) {
+ typedef std::map<MediaStreamType, bool> AllowedAudioTrackSourceTypePolicy;
+ // This test must be aligned with the policy of allowed tracks.
+ AllowedAudioTrackSourceTypePolicy p;
+ p[MEDIA_NO_SERVICE] = false;
+ p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // Only one allowed for now.
+ p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
+ p[MEDIA_TAB_AUDIO_CAPTURE] = false;
+ p[MEDIA_TAB_VIDEO_CAPTURE] = false;
+ p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
+ p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;
+ p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
+ // Ensure this test gets updated along with |content::MediaStreamType| enum.
+ EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
+ // Check the the entire policy.
+ for (AllowedAudioTrackSourceTypePolicy::iterator it = p.begin();
+ it != p.end(); ++it) {
+ ASSERT_EQ(it->second,
+ SpeechRecognitionAudioSourceProvider::IsAllowedAudioTrack(
+ CreateBlinkTrackWithMediaStreamType(it->first)));
+ }
+}
+
+TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) {
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 3U);
+}
+
+TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) {
+ // fill audio input frames with 0,1,2,3,...,440
+ for (size_t i = 0; i < source_data_length_; ++i) source_data_[i] = i;
+
+ const size_t num_frames_to_test = 12;
+ int16 sink_data[kOutputFramesPerBuffer * kOutputChannels];
+ media::AudioBus* sink_bus = recognizer_->audio_bus();
+
+ // Render the audio data from the recognizer.
+ sink_bus->ToInterleaved(sink_bus->frames(),
+ sink_params_.bits_per_sample() / 8, sink_data);
+
+ // Test both channels are zeroed out before we trigger resampling.
+ for (size_t i = 0; i < num_frames_to_test; ++i) {
+ ASSERT_EQ(0, sink_data[i * 2]);
+ ASSERT_EQ(0, sink_data[i * 2 + 1]);
+ }
+
+ // Trigger the source provider to resample the input data.
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // Render the audio data from the recognizer.
+ sink_bus->ToInterleaved(sink_bus->frames(),
+ sink_params_.bits_per_sample() / 8, sink_data);
+
+ // Resampled data expected frames - based on |source_data_|.
+ // Note: these values also depend on input/output audio params.
+ const int16 expected_data[num_frames_to_test] = {0, 2, 5, 8, 11, 13,
+ 16, 19, 22, 24, 27, 30};
+
+ // Test both channels have same resampled data.
+ for (size_t i = 0; i < num_frames_to_test; ++i) {
+ ASSERT_EQ(expected_data[i], sink_data[i * 2]);
+ ASSERT_EQ(expected_data[i], sink_data[i * 2 + 1]);
+ }
+}
+
+TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) {
+ // (2) Start out with no problems.
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // (2) A failure occurs (socket cannot to send).
+ recognizer_->SetFailureModeOnForeignSocket(true);
+ EXPECT_CALL(*this,
+ ErrorCallback(SpeechRecognitionAudioSourceProvider::SEND_FAILED))
+ .Times(1);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // (3) Miraculasly recovered from the socket failure.
burnik 2014/09/12 12:09:12 * Miraculously
burnik 2014/09/15 15:00:07 Done.
+ recognizer_->SetFailureModeOnForeignSocket(false);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 2U);
+}
+
+TEST_F(SpeechRecognitionAudioSourceProviderTest, PeerProcessGotUnresponsive) {
+ EXPECT_GT(kBuffersForReadyFifo, kBuffersPerNotification);
+ AssertConsumedBuffers(0U);
+
+ // (1) We respond to audio packets as expected.
+ recognizer_->SimulateResponsiveness(true);
+ // First round of input has to have one additional buffer
burnik 2014/09/12 12:09:12 This comment is deprecated.
burnik 2014/09/15 15:00:07 Done.
+ // to trigger processing.
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // (2) The recognizer on the browser becomes unresponsive.
+ recognizer_->SimulateResponsiveness(false);
+ EXPECT_CALL(*this, ErrorCallback(
+ SpeechRecognitionAudioSourceProvider::BUFFER_SYNC_LAG))
+ .Times(testing::AtLeast(1));
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // (2) The producer gets an overflow.
+ EXPECT_CALL(
+ *this,
+ ErrorCallback(SpeechRecognitionAudioSourceProvider::AUDIO_FIFO_OVERFLOW))
+ .Times(testing::AtLeast(1));
+ CaptureAudioAndAssertConsumedBuffers(kBuffersForReadyFifo, 1U);
+}
+
+TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) {
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+ EXPECT_CALL(
+ *this, ErrorCallback(SpeechRecognitionAudioSourceProvider::TRACK_STOPPED))
+ .Times(1);
+
+ native_track_->Stop();
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+}
+
+} // namespace content

Powered by Google App Engine
This is Rietveld 408576698