| Index: content/renderer/media/speech_recognition_audio_sink_unittest.cc
|
| diff --git a/content/renderer/media/speech_recognition_audio_sink_unittest.cc b/content/renderer/media/speech_recognition_audio_sink_unittest.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..387d3ea895ab809cb16c3d569a9b638ddaab856e
|
| --- /dev/null
|
| +++ b/content/renderer/media/speech_recognition_audio_sink_unittest.cc
|
| @@ -0,0 +1,466 @@
|
| +// Copyright 2014 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "content/renderer/media/speech_recognition_audio_sink.h"
|
| +
|
| +#include "base/strings/utf_string_conversions.h"
|
| +#include "content/renderer/media/mock_media_constraint_factory.h"
|
| +#include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
|
| +#include "content/renderer/media/webrtc_local_audio_track.h"
|
| +#include "media/audio/audio_parameters.h"
|
| +#include "media/base/audio_bus.h"
|
| +#include "testing/gmock/include/gmock/gmock.h"
|
| +#include "testing/gtest/include/gtest/gtest.h"
|
| +#include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
|
| +
|
| +namespace {
|
| +
|
| +// Supported speech recognition audio parameters.
|
| +const int kSpeechRecognitionSampleRate = 16000;
|
| +const int kSpeechRecognitionFramesPerBuffer = 1600;
|
| +
|
| +// Input audio format.
|
| +const media::AudioParameters::Format kInputFormat =
|
| + media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
|
| +const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
|
| +const int kInputChannels = 1;
|
| +const int kInputBitsPerSample = 16;
|
| +
|
| +// Output audio format.
|
| +const media::AudioParameters::Format kOutputFormat =
|
| + media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
|
| +const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
|
| +const int kOutputChannels = 2;
|
| +const int kOutputBitsPerSample = 16;
|
| +
|
| +// Mocked out sockets used for Send/Receive.
|
| +// Data is written and read from a shared buffer used as a FIFO and there is
|
| +// no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
|
| +class MockSyncSocket : public base::SyncSocket {
|
| + public:
|
| + // This allows for 2 requests in queue between the |MockSyncSocket|s.
|
| + static const int kSharedBufferSize = 8;
|
| +
|
| + // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap.
|
| + struct SharedBuffer {
|
| + SharedBuffer() : data(), start(0), length(0) {}
|
| +
|
| + uint8 data[kSharedBufferSize];
|
| + size_t start;
|
| + size_t length;
|
| + };
|
| +
|
| + // Callback used for pairing an A.Send() with B.Receieve() without blocking.
|
| + typedef base::Callback<void()> OnSendCB;
|
| +
|
| + explicit MockSyncSocket(SharedBuffer* shared_buffer)
|
| + : buffer_(shared_buffer),
|
| + in_failure_mode_(false) {}
|
| +
|
| + MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb)
|
| + : buffer_(shared_buffer),
|
| + on_send_cb_(on_send_cb),
|
| + in_failure_mode_(false) {}
|
| +
|
| + virtual size_t Send(const void* buffer, size_t length) OVERRIDE;
|
| + virtual size_t Receive(void* buffer, size_t length) OVERRIDE;
|
| +
|
| + // When |in_failure_mode_| == true, the socket fails to send.
|
| + void SetFailureMode(bool in_failure_mode) {
|
| + in_failure_mode_ = in_failure_mode;
|
| + }
|
| +
|
| + private:
|
| + SharedBuffer* buffer_;
|
| + const OnSendCB on_send_cb_;
|
| + bool in_failure_mode_;
|
| +};
|
| +
|
| +size_t MockSyncSocket::Send(const void* buffer, size_t length) {
|
| + if (in_failure_mode_)
|
| + return 0;
|
| +
|
| + const uint8* b = static_cast<const uint8*>(buffer);
|
| + for (size_t i = 0; i < length; ++i, ++buffer_->length)
|
| + buffer_->data[buffer_->start + buffer_->length] = b[i];
|
| +
|
| + on_send_cb_.Run();
|
| + return length;
|
| +}
|
| +
|
| +size_t MockSyncSocket::Receive(void* buffer, size_t length) {
|
| + uint8* b = static_cast<uint8*>(buffer);
|
| + for (size_t i = buffer_->start; i < buffer_->length; ++i, ++buffer_->start)
|
| + b[i] = buffer_->data[buffer_->start];
|
| +
|
| + // Since buffer is used sequentially, we can reset the buffer indices here.
|
| + buffer_->start = buffer_->length = 0;
|
| + return length;
|
| +}
|
| +
|
| +// This fake class is the consumer used to verify behaviour of the producer.
|
| +// The |Initialize()| method shows what the consumer should be responsible for
|
| +// in the production code (minus the mocks).
|
| +class FakeSpeechRecognizer {
|
| + public:
|
| + FakeSpeechRecognizer() : is_responsive_(true) { }
|
| +
|
| + void Initialize(
|
| + const blink::WebMediaStreamTrack& track,
|
| + const media::AudioParameters& sink_params,
|
| + base::SharedMemoryHandle* foreign_memory_handle) {
|
| + // Shared memory is allocated, mapped and shared.
|
| + uint32 shared_memory_size =
|
| + sizeof(media::AudioInputBufferParameters) +
|
| + media::AudioBus::CalculateMemorySize(sink_params);
|
| + shared_memory_.reset(new base::SharedMemory());
|
| + ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(shared_memory_size));
|
| + ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
|
| + foreign_memory_handle));
|
| +
|
| + // Wrap the shared memory for the audio bus.
|
| + media::AudioInputBuffer* buffer =
|
| + static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
|
| + audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
|
| +
|
| + // Reference to the counter used to synchronize.
|
| + buffer_index_ = &(buffer->params.size);
|
| + *buffer_index_ = 0U;
|
| +
|
| + // Create a shared buffer for the |MockSyncSocket|s.
|
| + shared_buffer_.reset(new MockSyncSocket::SharedBuffer());
|
| +
|
| + // Local socket will receive signals from the producer.
|
| + local_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
|
| +
|
| + // We automatically trigger a Receive when data is sent over the socket.
|
| + foreign_socket_ = new MockSyncSocket(
|
| + shared_buffer_.get(),
|
| + base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
|
| + base::Unretained(this)));
|
| +
|
| + // This is usually done to pair the sockets. Here it's not effective.
|
| + base::SyncSocket::CreatePair(local_socket_.get(), foreign_socket_);
|
| + }
|
| +
|
| + // Emulates a single iteraton of a thread receiving on the socket.
|
| + // This would normally be done on a receiving thread's task on the browser.
|
| + void EmulateReceiveThreadLoopIteration() {
|
| + // When not responsive do nothing as if the process is busy.
|
| + if (!is_responsive_)
|
| + return;
|
| +
|
| + local_socket_->Receive(buffer_index_, sizeof(*buffer_index_));
|
| + // Notify the producer that the audio buffer has been consumed.
|
| + ++(*buffer_index_);
|
| + }
|
| +
|
| + // Used to simulate an unresponsive behaviour of the consumer.
|
| + void SimulateResponsiveness(bool is_responsive) {
|
| + is_responsive_ = is_responsive;
|
| + }
|
| +
|
| + MockSyncSocket* foreign_socket() { return foreign_socket_; }
|
| + media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
|
| + uint32 buffer_index() { return *buffer_index_; }
|
| +
|
| + private:
|
| + bool is_responsive_;
|
| +
|
| + // Shared memory for the audio and synchronization.
|
| + scoped_ptr<base::SharedMemory> shared_memory_;
|
| +
|
| + // Fake sockets and their shared buffer.
|
| + scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_;
|
| + scoped_ptr<MockSyncSocket> local_socket_;
|
| + MockSyncSocket* foreign_socket_;
|
| +
|
| + // Audio bus wrapping the shared memory from the renderer.
|
| + scoped_ptr<media::AudioBus> audio_track_bus_;
|
| +
|
| + // Used for synchronization of sent/received buffers.
|
| + uint32* buffer_index_;
|
| +};
|
| +
|
| +} // namespace
|
| +
|
| +namespace content {
|
| +
|
| +class SpeechRecognitionAudioSinkTest : public testing::Test {
|
| + public:
|
| + SpeechRecognitionAudioSinkTest() { }
|
| +
|
| + // Initializes the producer and consumer with specified audio parameters.
|
| + // Returns the minimal number of input audio buffers which need to be captured
|
| + // before they get sent to the consumer.
|
| + uint32 Initialize(int input_sample_rate,
|
| + int input_frames_per_buffer,
|
| + int output_sample_rate,
|
| + int output_frames_per_buffer) {
|
| + // Audio Environment setup.
|
| + source_params_.Reset(kInputFormat,
|
| + kInputChannelLayout,
|
| + kInputChannels,
|
| + input_sample_rate,
|
| + kInputBitsPerSample,
|
| + input_frames_per_buffer);
|
| + sink_params_.Reset(kOutputFormat,
|
| + kOutputChannelLayout,
|
| + kOutputChannels,
|
| + output_sample_rate,
|
| + kOutputBitsPerSample,
|
| + output_frames_per_buffer);
|
| + source_data_.reset(new int16[input_frames_per_buffer * kInputChannels]);
|
| +
|
| + // Prepare the track and audio source.
|
| + blink::WebMediaStreamTrack blink_track;
|
| + PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track);
|
| +
|
| + // Get the native track from the blink track and initialize.
|
| + native_track_ =
|
| + static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData());
|
| + native_track_->OnSetFormat(source_params_);
|
| +
|
| + // Create and initialize the consumer.
|
| + recognizer_.reset(new FakeSpeechRecognizer());
|
| + base::SharedMemoryHandle foreign_memory_handle;
|
| + recognizer_->Initialize(blink_track, sink_params_, &foreign_memory_handle);
|
| +
|
| + // Create the producer.
|
| + scoped_ptr<base::SyncSocket> foreign_socket(recognizer_->foreign_socket());
|
| + speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
|
| + blink_track, sink_params_, foreign_memory_handle,
|
| + foreign_socket.Pass(),
|
| + base::Bind(&SpeechRecognitionAudioSinkTest::StoppedCallback,
|
| + base::Unretained(this))));
|
| +
|
| + // Return number of buffers needed to trigger resampling and consumption.
|
| + return static_cast<uint32>(std::ceil(
|
| + static_cast<double>(output_frames_per_buffer * input_sample_rate) /
|
| + (input_frames_per_buffer * output_sample_rate)));
|
| + }
|
| +
|
| + // Mock callback expected to be called when the track is stopped.
|
| + MOCK_METHOD0(StoppedCallback, void());
|
| +
|
| + protected:
|
| + // Prepares a blink track of a given MediaStreamType and attaches the native
|
| + // track which can be used to capture audio data and pass it to the producer.
|
| + static void PrepareBlinkTrackOfType(
|
| + const MediaStreamType device_type,
|
| + blink::WebMediaStreamTrack* blink_track) {
|
| + StreamDeviceInfo device_info(device_type, "Mock device",
|
| + "mock_device_id");
|
| + MockMediaConstraintFactory constraint_factory;
|
| + const blink::WebMediaConstraints constraints =
|
| + constraint_factory.CreateWebMediaConstraints();
|
| + scoped_refptr<WebRtcAudioCapturer> capturer(
|
| + WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL,
|
| + NULL));
|
| + scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
|
| + WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
|
| + scoped_ptr<WebRtcLocalAudioTrack> native_track(
|
| + new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
|
| + blink::WebMediaStreamSource blink_audio_source;
|
| + blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
|
| + blink::WebMediaStreamSource::TypeAudio,
|
| + base::UTF8ToUTF16("dummy_source_name"));
|
| + MediaStreamSource::SourceStoppedCallback cb;
|
| + blink_audio_source.setExtraData(
|
| + new MediaStreamAudioSource(-1, device_info, cb, NULL));
|
| + blink_track->initialize(blink::WebString::fromUTF8("dummy_track"),
|
| + blink_audio_source);
|
| + blink_track->setExtraData(native_track.release());
|
| + }
|
| +
|
| + // Emulates an audio capture device capturing data from the source.
|
| + inline void CaptureAudio(const uint32 buffers) {
|
| + for (uint32 i = 0; i < buffers; ++i)
|
| + native_track_->Capture(source_data_.get(),
|
| + base::TimeDelta::FromMilliseconds(0), 1, false,
|
| + false);
|
| + }
|
| +
|
| + // Used to simulate a problem with sockets.
|
| + void SetFailureModeOnForeignSocket(bool in_failure_mode) {
|
| + recognizer_->foreign_socket()->SetFailureMode(in_failure_mode);
|
| + }
|
| +
|
| + // Helper method for verifying captured audio data has been consumed.
|
| + inline void AssertConsumedBuffers(const uint32 buffer_index) {
|
| + ASSERT_EQ(buffer_index, recognizer_->buffer_index());
|
| + }
|
| +
|
| + // Helper method for providing audio data to producer and verifying it was
|
| + // consumed on the recognizer.
|
| + inline void CaptureAudioAndAssertConsumedBuffers(const uint32 buffers,
|
| + const uint32 buffer_index) {
|
| + CaptureAudio(buffers);
|
| + AssertConsumedBuffers(buffer_index);
|
| + }
|
| +
|
| + // Helper method to capture and assert consumption at different sample rates
|
| + // and audio buffer sizes.
|
| + inline void AssertConsumptionForAudioParameters(
|
| + const int input_sample_rate,
|
| + const int input_frames_per_buffer,
|
| + const int output_sample_rate,
|
| + const int output_frames_per_buffer,
|
| + const uint32 consumptions) {
|
| + const uint32 kBuffersPerNotification =
|
| + Initialize(input_sample_rate, input_frames_per_buffer,
|
| + output_sample_rate, output_frames_per_buffer);
|
| + AssertConsumedBuffers(0U);
|
| +
|
| + for (uint32 i = 1U; i <= consumptions; ++i) {
|
| + CaptureAudio(kBuffersPerNotification);
|
| + ASSERT_EQ(i, recognizer_->buffer_index())
|
| + << "Tested at rates: "
|
| + << "In(" << input_sample_rate << ", " << input_frames_per_buffer
|
| + << ") "
|
| + << "Out(" << output_sample_rate << ", " << output_frames_per_buffer
|
| + << ")";
|
| + }
|
| + }
|
| +
|
| + // Producer.
|
| + scoped_ptr<SpeechRecognitionAudioSink> speech_audio_sink_;
|
| +
|
| + // Consumer.
|
| + scoped_ptr<FakeSpeechRecognizer> recognizer_;
|
| +
|
| + // Audio related members.
|
| + scoped_ptr<int16[]> source_data_;
|
| + media::AudioParameters source_params_;
|
| + media::AudioParameters sink_params_;
|
| + WebRtcLocalAudioTrack* native_track_;
|
| +};
|
| +
|
| +// Not all types of tracks are supported. This test checks if that policy is
|
| +// implemented correctly.
|
| +TEST_F(SpeechRecognitionAudioSinkTest, CheckIsSupportedAudioTrack) {
|
| + typedef std::map<MediaStreamType, bool> SupportedTrackPolicy;
|
| +
|
| + // This test must be aligned with the policy of supported tracks.
|
| + SupportedTrackPolicy p;
|
| + p[MEDIA_NO_SERVICE] = false;
|
| + p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // The only one supported for now.
|
| + p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
|
| + p[MEDIA_TAB_AUDIO_CAPTURE] = false;
|
| + p[MEDIA_TAB_VIDEO_CAPTURE] = false;
|
| + p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
|
| + p[MEDIA_LOOPBACK_AUDIO_CAPTURE] = false;
|
| + p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
|
| +
|
| + // Ensure this test gets updated along with |content::MediaStreamType| enum.
|
| + EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
|
| +
|
| + // Check the the entire policy.
|
| + for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) {
|
| + blink::WebMediaStreamTrack blink_track;
|
| + PrepareBlinkTrackOfType(it->first, &blink_track);
|
| + ASSERT_EQ(
|
| + it->second,
|
| + SpeechRecognitionAudioSink::IsSupportedTrack(blink_track));
|
| + }
|
| +}
|
| +
|
| +// Checks if the producer can support the listed range of input sample rates
|
| +// and associated buffer sizes.
|
| +TEST_F(SpeechRecognitionAudioSinkTest, RecognizerNotifiedOnSocket) {
|
| + const size_t kNumAudioParamTuples = 24;
|
| + const int kAudioParams[kNumAudioParamTuples][2] = {
|
| + {8000, 80}, {8000, 800}, {16000, 160}, {16000, 1600},
|
| + {24000, 240}, {24000, 2400}, {32000, 320}, {32000, 3200},
|
| + {44100, 441}, {44100, 4410}, {48000, 480}, {48000, 4800},
|
| + {96000, 960}, {96000, 9600}, {11025, 111}, {11025, 1103},
|
| + {22050, 221}, {22050, 2205}, {88200, 882}, {88200, 8820},
|
| + {176400, 1764}, {176400, 17640}, {192000, 1920}, {192000, 19200}};
|
| +
|
| + // Check all listed tuples of input sample rates and buffers sizes.
|
| + for (size_t i = 0; i < kNumAudioParamTuples; ++i) {
|
| + AssertConsumptionForAudioParameters(
|
| + kAudioParams[i][0], kAudioParams[i][1],
|
| + kSpeechRecognitionSampleRate, kSpeechRecognitionFramesPerBuffer, 3U);
|
| + }
|
| +}
|
| +
|
| +// Checks that the input data is getting resampled to the target sample rate.
|
| +TEST_F(SpeechRecognitionAudioSinkTest, AudioDataIsResampledOnSink) {
|
| + EXPECT_GE(kInputChannels, 1);
|
| + EXPECT_GE(kOutputChannels, 1);
|
| +
|
| + // Input audio is sampled at 44.1 KHz with data chunks of 10ms. Desired output
|
| + // is corresponding to the speech recognition engine requirements: 16 KHz with
|
| + // 100 ms chunks (1600 frames per buffer).
|
| + const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
|
| +
|
| + // Fill audio input frames with 0, 1, 2, 3, ..., 440.
|
| + const uint32 kSourceDataLength = 441 * kInputChannels;
|
| + for (uint32 i = 0; i < kSourceDataLength; ++i) {
|
| + for (int c = 0; c < kInputChannels; ++c)
|
| + source_data_[i * kInputChannels + c] = i;
|
| + }
|
| +
|
| + // Prepare sink audio bus and data for rendering.
|
| + media::AudioBus* sink_bus = recognizer_->audio_bus();
|
| + const uint32 kSinkDataLength = 1600 * kOutputChannels;
|
| + int16 sink_data[kSinkDataLength] = {0};
|
| +
|
| + // Render the audio data from the recognizer.
|
| + sink_bus->ToInterleaved(sink_bus->frames(),
|
| + sink_params_.bits_per_sample() / 8, sink_data);
|
| +
|
| + // Checking only a fraction of the sink frames.
|
| + const uint32 kNumFramesToTest = 12;
|
| +
|
| + // Check all channels are zeroed out before we trigger resampling.
|
| + for (uint32 i = 0; i < kNumFramesToTest; ++i) {
|
| + for (int c = 0; c < kOutputChannels; ++c)
|
| + EXPECT_EQ(0, sink_data[i * kOutputChannels + c]);
|
| + }
|
| +
|
| + // Trigger the speech sink to resample the input data.
|
| + AssertConsumedBuffers(0U);
|
| + CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
|
| +
|
| + // Render the audio data from the recognizer.
|
| + sink_bus->ToInterleaved(sink_bus->frames(),
|
| + sink_params_.bits_per_sample() / 8, sink_data);
|
| +
|
| + // Resampled data expected frames. Extracted based on |source_data_|.
|
| + const int16 kExpectedData[kNumFramesToTest] = {0, 2, 5, 8, 11, 13,
|
| + 16, 19, 22, 24, 27, 30};
|
| +
|
| + // Check all channels have the same resampled data.
|
| + for (uint32 i = 0; i < kNumFramesToTest; ++i) {
|
| + for (int c = 0; c < kOutputChannels; ++c)
|
| + EXPECT_EQ(kExpectedData[i], sink_data[i * kOutputChannels + c]);
|
| + }
|
| +}
|
| +
|
| +// Checks that the producer does not misbehave when a socket failure occurs.
|
| +TEST_F(SpeechRecognitionAudioSinkTest, SyncSocketFailsSendingData) {
|
| + const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
|
| + // Start with no problems on the socket.
|
| + AssertConsumedBuffers(0U);
|
| + CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
|
| +
|
| + // A failure occurs (socket cannot send).
|
| + SetFailureModeOnForeignSocket(true);
|
| + CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
|
| +}
|
| +
|
| +// Checks that an OnStoppedCallback is issued when the track is stopped.
|
| +TEST_F(SpeechRecognitionAudioSinkTest, OnReadyStateChangedOccured) {
|
| + const uint32 kBuffersPerNotification = Initialize(44100, 441, 16000, 1600);
|
| + AssertConsumedBuffers(0U);
|
| + CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
|
| + EXPECT_CALL(*this, StoppedCallback()).Times(1);
|
| +
|
| + native_track_->Stop();
|
| + CaptureAudioAndAssertConsumedBuffers(kBuffersPerNotification, 1U);
|
| +}
|
| +
|
| +} // namespace content
|
|
|