Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(472)

Unified Diff: content/renderer/media/speech_recognition_audio_source_provider_unittest.cc

Issue 499233003: Binding media stream audio track to speech recognition [renderer] (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Refactoring, error states, more comments. Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/renderer/media/speech_recognition_audio_source_provider_unittest.cc
diff --git a/content/renderer/media/speech_recognition_audio_source_provider_unittest.cc b/content/renderer/media/speech_recognition_audio_source_provider_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0ff7a184145cc04407040a621d3829d26f548fa8
--- /dev/null
+++ b/content/renderer/media/speech_recognition_audio_source_provider_unittest.cc
@@ -0,0 +1,462 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/renderer/media/speech_recognition_audio_source_provider.h"
+
+#include "base/strings/utf_string_conversions.h"
+#include "content/renderer/media/mock_media_constraint_factory.h"
+#include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
+#include "content/renderer/media/webrtc_local_audio_track.h"
+#include "media/audio/audio_parameters.h"
+#include "media/base/audio_bus.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
+
+namespace content {
+
+// Mocked out sockets used for Send/Receive.
+// Data is written and read from a shared buffer used as a FIFO and there is
+// no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
+class MockSyncSocket : public base::SyncSocket {
+ public:
+ // This allows for 2 requests in queue between the |MockSyncSocket|s.
+ static const int kSharedBufferSize = 8;
+
+ // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap.
+ struct SharedBuffer {
+ SharedBuffer() : start(0), length(0) {}
tommi (sloooow) - chröme 2014/09/24 09:52:00 nit: what about also initializing data? SharedBuf
burnik 2014/09/24 11:54:22 Done.
+
+ uint8 data[kSharedBufferSize];
+ size_t start;
+ size_t length;
+ };
+
+ // Callback used for pairing an A.Send() with B.Receieve() without blocking.
+ typedef base::Callback<void()> OnSendCB;
+
+ explicit MockSyncSocket(SharedBuffer* shared_buffer)
+ : buffer_(shared_buffer),
+ in_failure_mode_(false) { }
tommi (sloooow) - chröme 2014/09/24 09:51:59 nit: {}
burnik 2014/09/24 11:54:22 Done.
+
+ MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb)
+ : buffer_(shared_buffer),
+ on_send_cb_(on_send_cb),
+ in_failure_mode_(false) { }
+
+ virtual size_t Send(const void* buffer, size_t length) OVERRIDE;
+ virtual size_t Receive(void* buffer, size_t length) OVERRIDE;
+
+ // When |in_failure_mode_| == true, the socket fails to send.
+ void SetFailureMode(bool in_failure_mode) {
+ in_failure_mode_ = in_failure_mode;
+ }
+
+ private:
+ SharedBuffer* buffer_;
+ const OnSendCB on_send_cb_;
+ bool in_failure_mode_;
+};
+
+size_t MockSyncSocket::Send(const void* buffer, size_t length) {
+ if (in_failure_mode_)
+ return 0;
+
+ uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
tommi (sloooow) - chröme 2014/09/24 09:51:59 is this safe (if it is, please add a comment)? Wh
burnik 2014/09/24 11:54:22 Would this be safe? const uint8* b = static_cast<
+ for (size_t i = 0; i < length; ++i, ++buffer_->length)
+ buffer_->data[buffer_->start + buffer_->length] = b[i];
tommi (sloooow) - chröme 2014/09/24 09:51:59 hmm... I don't see why you need to cast away the c
burnik 2014/09/24 11:54:22 Acknowledged.
+
+ on_send_cb_.Run();
+ return length;
+}
+
+size_t MockSyncSocket::Receive(void* buffer, size_t length) {
+ uint8* b = static_cast<uint8*>(const_cast<void*>(buffer));
tommi (sloooow) - chröme 2014/09/24 09:51:59 buffer isn't const, so no need for the const_cast
burnik 2014/09/24 11:54:22 Done.
+ for (size_t i = buffer_->start; i < buffer_->length; ++i, ++buffer_->start)
+ b[i] = buffer_->data[buffer_->start];
+
+ // Since buffer is used sequentially, we can reset the buffer indices here.
+ buffer_->start = buffer_->length = 0;
+ return length;
+}
+
+// This fake class is the consumer used to verify behaviour of the producer.
+// The |Initialize()| method shows what the consumer should be responsible for
+// in the production code (minus the mocks).
+class FakeSpeechRecognizer {
+ public:
+ FakeSpeechRecognizer() : is_responsive_(true) { }
+
+ void Initialize(
+ const blink::WebMediaStreamTrack& track,
+ const media::AudioParameters& sink_params,
+ base::SharedMemoryHandle* foreign_memory_handle) {
+ // Shared memory is allocated, mapped and shared.
+ uint32 shared_memory_size =
+ sizeof(media::AudioInputBufferParameters) +
+ media::AudioBus::CalculateMemorySize(sink_params);
+ shared_memory_.reset(new base::SharedMemory());
+ ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size));
+ ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
+ foreign_memory_handle));
+
+ // Wrap the shared memory for the audio bus.
+ media::AudioInputBuffer* buffer =
+ static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
+ audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
+
+ // Reference to the counter used to synchronize.
+ buffer_index_ = &(buffer->params.size);
tommi (sloooow) - chröme 2014/09/24 09:51:59 what about just having a member variable of type m
burnik 2014/09/24 11:54:22 I think AudioInputBuffer is a terrible name for th
+ *buffer_index_ = 0U;
+
+ // Create a shared buffer for the |MockSyncSocket|s.
+ shared_buffer_.reset(new MockSyncSocket::SharedBuffer());
+
+ // Local socket will receive signals from the producer.
+ local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
+
+ // We automatically trigger a Receive when data is sent over the socket.
+ foreign_socket_ = new MockSyncSocket(
+ shared_buffer_.get(),
+ base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
+ base::Unretained(this)));
+
+ // This is usually done to pair the sockets. Here it's not effective.
+ base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_);
+ }
+
+ // Emulates a single iteraton of a thread receiving on the socket.
+ // This would normally be done on a receiving thread's task on the browser.
+ void EmulateReceiveThreadLoopIteration() {
+ // When not responsive do nothing as if the process is busy.
+ if (!is_responsive_)
+ return;
+
+ local_socket_->Receive(buffer_index_, sizeof(*buffer_index_));
+ // Notify the producer that the audio buffer has been consumed.
+ ++(*buffer_index_);
+ }
+
+ // Used to simulate an unresponsive behaviour of the consumer.
+ void SimulateResponsiveness(bool is_responsive) {
+ is_responsive_ = is_responsive;
+ }
+
+ MockSyncSocket* foreign_socket() { return foreign_socket_; }
+ media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
+ uint32 buffer_index() { return *buffer_index_; }
tommi (sloooow) - chröme 2014/09/24 09:51:59 isn't this returning 'size' rather than a buffer i
burnik 2014/09/24 11:54:22 Again. I'm actually using 'size' to count the buff
+
+ private:
+ bool is_responsive_;
+
+ // Shared memory for the audio and synchronization.
+ scoped_ptr<base::SharedMemory> shared_memory_;
+
+ // Fake sockets and their shared buffer.
+ scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_;
+ scoped_ptr<MockSyncSocket> local_socket_;
+ MockSyncSocket* foreign_socket_;
+
+ // Audio bus wrapping the shared memory from the renderer.
+ scoped_ptr<media::AudioBus> audio_track_bus_;
+
+ // Used for synchronization of sent/received buffers.
+ uint32* buffer_index_;
+};
+
+namespace {
+
+// Supported speech recognition audio parameters.
+const int kSpeechRecognitionSampleRate = 16000;
+const int kSpeechRecognitionFramesPerBuffer = 1600;
+
+// Input audio format.
+const media::AudioParameters::Format kInputFormat =
+ media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
+const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
+const int kInputChannels = 1;
+const int kInputBitsPerSample = 16;
+
+// Output audio format.
+const media::AudioParameters::Format kOutputFormat =
+ media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
+const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
+const int kOutputChannels = 2;
+const int kOutputBitsPerSample = 16;
+
+} // namespace
+
+class SpeechRecognitionAudioSourceProviderTest : public testing::Test {
+ public:
+ SpeechRecognitionAudioSourceProviderTest() { }
+
+ // Initializes the producer and consumer with specified audio parameters.
+ // Returns the minimal number of input audio buffers which need to be captured
+ // before they get sent to the consumer.
+ uint32 Initialize(int input_sample_rate,
+ int input_frames_per_buffer,
+ int output_sample_rate,
+ int output_frames_per_buffer) {
+ // Audio Environment setup.
+ source_params_.Reset(kInputFormat,
+ kInputChannelLayout,
+ kInputChannels,
+ input_sample_rate,
+ kInputBitsPerSample,
+ input_frames_per_buffer);
+ sink_params_.Reset(kOutputFormat,
+ kOutputChannelLayout,
+ kOutputChannels,
+ output_sample_rate,
+ kOutputBitsPerSample,
+ output_frames_per_buffer);
+ source_data_.reset(new int16[input_frames_per_buffer * kInputChannels]);
+
+ // Prepare the track and audio source.
+ blink::WebMediaStreamTrack blink_track;
+ PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track);
+
+ // Get the native track from the blink track and initialize.
+ native_track_ =
+ static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData());
+ native_track_->OnSetFormat(source_params_);
+
+ // Create and initialize the consumer.
+ recognizer_.reset(new FakeSpeechRecognizer());
+ base::SharedMemoryHandle foreign_memory_handle;
+ recognizer_->Initialize(blink_track, sink_params_, &foreign_memory_handle);
+
+ // Create the producer.
+ audio_source_provider_.reset(new SpeechRecognitionAudioSourceProvider(
+ blink_track, sink_params_, foreign_memory_handle,
+ recognizer_->foreign_socket(),
+ base::Bind(&SpeechRecognitionAudioSourceProviderTest::StoppedCallback,
+ base::Unretained(this))));
+
+ // Return number of buffers needed to trigger resampling and consumption.
+ return static_cast<uint32>(std::ceil(
+ static_cast<double>(output_frames_per_buffer * input_sample_rate) /
+ (input_frames_per_buffer * output_sample_rate)));
+ }
+
+ // Mock callback expected to be called when the track is stopped.
+ MOCK_METHOD0(StoppedCallback, void());
+
+ protected:
+ // Prepares a blink track of a given MediaStreamType and attaches the native
+ // track which can be used to capture audio data and pass it to the producer.
+ static void PrepareBlinkTrackOfType(
+ const MediaStreamType device_type,
+ blink::WebMediaStreamTrack* blink_track) {
+ StreamDeviceInfo device_info(device_type, "Mock device",
+ "mock_device_id");
+ MockMediaConstraintFactory constraint_factory;
+ const blink::WebMediaConstraints constraints =
+ constraint_factory.CreateWebMediaConstraints();
+ scoped_refptr<WebRtcAudioCapturer> capturer(
+ WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL,
+ NULL));
+ scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
+ WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
+ scoped_ptr<WebRtcLocalAudioTrack> native_track(
+ new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
+ blink::WebMediaStreamSource blink_audio_source;
+ blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
+ blink::WebMediaStreamSource::TypeAudio,
+ base::UTF8ToUTF16("dummy_source_name"));
+ MediaStreamSource::SourceStoppedCallback cb;
+ blink_audio_source.setExtraData(
+ new MediaStreamAudioSource(-1, device_info, cb, NULL));
+ blink_track->initialize(blink::WebString::fromUTF8("dummy_track"),
+ blink_audio_source);
+ blink_track->setExtraData(native_track.release());
+ }
+
+ // Emulates an audio capture device capturing data from the source.
+ inline void CaptureAudio(const uint32 buffers) {
+ for (uint32 i = 0; i < buffers; ++i)
+ native_track_->Capture(source_data_.get(),
+ base::TimeDelta::FromMilliseconds(0), 1, false,
+ false);
+ }
+
+ // Used to simulate a problem with sockets.
+ void SetFailureModeOnForeignSocket(bool in_failure_mode) {
+ recognizer_->foreign_socket()->SetFailureMode(in_failure_mode);
+ }
+
+ // Helper method for verifying captured audio data has been consumed.
+ inline void AssertConsumedBuffers(const uint32 buffer_index) {
+ ASSERT_EQ(buffer_index, recognizer_->buffer_index());
+ }
+
+ // Helper method for providing audio data to producer and verifying it was
+ // consumed on the recognizer.
+ inline void CaptureAudioAndAssertConsumedBuffers(const uint32 buffers,
+ const uint32 buffer_index) {
+ CaptureAudio(buffers);
+ AssertConsumedBuffers(buffer_index);
+ }
+
+ // Helper method to capture and assert consumption at different sample rates
+ // and audio buffer sizes.
+ inline void AssertConsumptionForAudioParameters(
+ const int input_sample_rate,
+ const int input_frames_per_buffer,
+ const int output_sample_rate,
+ const int output_frames_per_buffer,
+ const uint32 consumptions) {
+ const uint32 kBuffersPerNotification =
+ Initialize(input_sample_rate, input_frames_per_buffer,
+ output_sample_rate, output_frames_per_buffer);
+ AssertConsumedBuffers(0U);
+
+ for (uint32 i = 1U; i <= consumptions; ++i) {
+ CaptureAudio(kBuffersPerNotification);
+ ASSERT_EQ(i, recognizer_->buffer_index())
+ << "Tested at rates: "
+ << "In(" << input_sample_rate << ", " << input_frames_per_buffer
+ << ") "
+ << "Out(" << output_sample_rate << ", " << output_frames_per_buffer
+ << ")";
+ }
+ }
+
+ // Producer.
+ scoped_ptr<SpeechRecognitionAudioSourceProvider> audio_source_provider_;
+
+ // Consumer.
+ scoped_ptr<FakeSpeechRecognizer> recognizer_;
+
+ // Audio related members.
+ scoped_ptr<int16[]> source_data_;
+ media::AudioParameters source_params_;
+ media::AudioParameters sink_params_;
+ WebRtcLocalAudioTrack* native_track_;
+};
+
+// Not all types of tracks are supported. This test checks if that policy is
+// implemented correctly.
+TEST_F(SpeechRecognitionAudioSourceProviderTest, CheckIsSupportedAudioTrack) {
+ typedef std::map<MediaStreamType, bool> SupportedTrackPolicy;
+
+ // This test must be aligned with the policy of supported tracks.
+ SupportedTrackPolicy p;
+ p[MEDIA_NO_SERVICE] = false;
+ p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // The only one supported for now.
+ p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
+ p[MEDIA_TAB_AUDIO_CAPTURE] = false;
+ p[MEDIA_TAB_VIDEO_CAPTURE] = false;
+ p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
+ p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;
+ p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
+
+ // Ensure this test gets updated along with |content::MediaStreamType| enum.
+ EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
+
+ // Check the the entire policy.
+ for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) {
+ blink::WebMediaStreamTrack blink_track;
+ PrepareBlinkTrackOfType(it->first, &blink_track);
+ ASSERT_EQ(
+ it->second,
+ SpeechRecognitionAudioSourceProvider::IsSupportedTrack(blink_track));
+ }
+}
+
+// Checks if the producer can support the listed range of input sample rates
+// and associated buffer sizes.
+TEST_F(SpeechRecognitionAudioSourceProviderTest, RecognizerNotifiedOnSocket) {
+ const size_t kNumAudioParamTuples = 22;
+ const int kAudioParams[kNumAudioParamTuples][2] = {
tommi (sloooow) - chröme 2014/09/24 09:52:00 add 24000?
burnik 2014/09/24 11:54:22 Done.
+ {8000, 80}, {8000, 800}, {16000, 160}, {16000, 1600},
+ {32000, 320}, {32000, 3200}, {44100, 441}, {44100, 4410},
+ {48000, 480}, {48000, 4800}, {96000, 960}, {96000, 9600},
+ {11025, 111}, {11025, 1103}, {22050, 221}, {22050, 2205},
+ {88200, 882}, {88200, 8820}, {176400, 1764}, {176400, 17640},
+ {192000, 1920}, {192000, 19200}};
+
+ // Check all listed tuples of input sample rates and buffers sizes.
+ for (size_t i = 0; i < kNumAudioParamTuples; ++i) {
+ AssertConsumptionForAudioParameters(
+ kAudioParams[i][0], kAudioParams[i][1],
+ kSpeechRecognitionSampleRate, kSpeechRecognitionFramesPerBuffer, 3U);
+ }
+}
+
+// Checks that the input data is getting resampled to the target sample rate.
+TEST_F(SpeechRecognitionAudioSourceProviderTest, AudioDataIsResampledOnSink) {
+ EXPECT_GE(kInputChannels, 1);
+ EXPECT_GE(kOutputChannels, 1);
+
+ // Input audio is sampled at 44.1 KHz with data chunks of 10ms. Desired output
+ // is corresponding to the speech recognition engine requirements: 16 KHz with
+ // 100 ms chunks (1600 frames per buffer).
+ const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
+
+ // Fill audio input frames with 0, 1, 2, 3, ..., 440.
+ const uint32 kSourceDataLength = 441 * kInputChannels;
+ for (uint32 i = 0; i < kSourceDataLength; ++i)
tommi (sloooow) - chröme 2014/09/24 09:51:59 {}
burnik 2014/09/24 11:54:22 Done.
+ for (int c = 0; c < kInputChannels; ++c)
+ source_data_[i * kInputChannels + c] = i;
+
+ // Prepare sink audio bus and data for rendering.
+ media::AudioBus* sink_bus = recognizer_->audio_bus();
+ const uint32 kSinkDataLength = 1600 * kOutputChannels;
+ int16 sink_data[kSinkDataLength];
tommi (sloooow) - chröme 2014/09/24 09:51:59 nit: = {0};
burnik 2014/09/24 11:54:22 Done. Does this array init-to-zero on stack work f
+
+ // Render the audio data from the recognizer.
+ sink_bus->ToInterleaved(sink_bus->frames(),
+ sink_params_.bits_per_sample() / 8, sink_data);
+
+ // Checking only a fraction of the sink frames.
+ const uint32 kNumFramesToTest = 12;
+
+ // Check all channels are zeroed out before we trigger resampling.
+ for (uint32 i = 0; i < kNumFramesToTest; ++i)
tommi (sloooow) - chröme 2014/09/24 09:51:59 {}
burnik 2014/09/24 11:54:22 Done.
+ for (int c = 0; c < kOutputChannels; ++c)
+ ASSERT_EQ(0, sink_data[i * kOutputChannels + c]);
tommi (sloooow) - chröme 2014/09/24 09:51:59 EXPECT_EQ?
burnik 2014/09/24 11:54:22 Done.
+
+ // Trigger the source provider to resample the input data.
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // Render the audio data from the recognizer.
+ sink_bus->ToInterleaved(sink_bus->frames(),
+ sink_params_.bits_per_sample() / 8, sink_data);
+
+ // Resampled data expected frames. Extracted based on |source_data_|.
+ const int16 kExpectedData[kNumFramesToTest] = {0, 2, 5, 8, 11, 13,
+ 16, 19, 22, 24, 27, 30};
+
+ // Check all channels have the same resampled data.
+ for (uint32 i = 0; i < kNumFramesToTest; ++i)
tommi (sloooow) - chröme 2014/09/24 09:51:59 {}
burnik 2014/09/24 11:54:22 Done.
+ for (int c = 0; c < kOutputChannels; ++c)
+ ASSERT_EQ(kExpectedData[i], sink_data[i * kOutputChannels + c]);
tommi (sloooow) - chröme 2014/09/24 09:51:59 EXPECT_EQ?
burnik 2014/09/24 11:54:22 Done.
+}
+
+// Checks that the producer does not misbehave when a socket failure occurs.
+TEST_F(SpeechRecognitionAudioSourceProviderTest, SyncSocketFailsSendingData) {
+ const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
+ // Start with no problems on the socket.
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+
+ // A failure occurs (socket cannot send).
+ SetFailureModeOnForeignSocket(true);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+}
+
+// Checks that an OnStoppedCallback is issued when the track is stopped.
+TEST_F(SpeechRecognitionAudioSourceProviderTest, OnReadyStateChangedOccured) {
+ const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
+ AssertConsumedBuffers(0U);
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+ EXPECT_CALL(*this, StoppedCallback()).Times(1);
+
+ native_track_->Stop();
+ CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
+}
+
+} // namespace content

Powered by Google App Engine
This is Rietveld 408576698