| Index: chrome/browser/speech/endpointer/endpointer_unittest.cc
|
| diff --git a/chrome/browser/speech/endpointer/endpointer_unittest.cc b/chrome/browser/speech/endpointer/endpointer_unittest.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..b49a6a69a1caf6d18e243c880fdc90534cf92e43
|
| --- /dev/null
|
| +++ b/chrome/browser/speech/endpointer/endpointer_unittest.cc
|
| @@ -0,0 +1,146 @@
|
| +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "base/task.h"
|
| +#include "chrome/browser/speech/endpointer/endpointer.h"
|
| +#include "testing/gtest/include/gtest/gtest.h"
|
| +
|
| +namespace {
|
| +const int kFrameRate = 50; // 20 ms long frames for AMR encoding.
|
| +const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
|
| +
|
| +// At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
|
| +// to the AMR codec.
|
| +const int kFrameSize = kSampleRate / kFrameRate; // 160 samples.
|
| +COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
|
| +}
|
| +
|
| +namespace speech_input {
|
| +
|
| +class FrameProcessor {
|
| + public:
|
| + // Process a single frame of test audio samples.
|
| + virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0;
|
| +};
|
| +
|
| +void RunEndpointerEventsTest(FrameProcessor* processor) {
|
| + int16 samples[kFrameSize];
|
| +
|
| + // We will create a white noise signal of 150 frames. The frames from 50 to
|
| + // 100 will have more power, and the endpointer should fire on those frames.
|
| + const int kNumFrames = 150;
|
| +
|
| + // Create a random sequence of samples.
|
| + srand(1);
|
| + float gain = 0.0;
|
| + int64 time = 0;
|
| + for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
|
| + // The frames from 50 to 100 will have more power, and the endpointer
|
| + // should detect those frames as speech.
|
| + if ((frame_count >= 50) && (frame_count < 100)) {
|
| + gain = 2000.0;
|
| + } else {
|
| + gain = 1.0;
|
| + }
|
| + // Create random samples.
|
| + for (int i = 0; i < kFrameSize; ++i) {
|
| + float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
|
| + static_cast<float>(RAND_MAX);
|
| + samples[i] = static_cast<int16>(gain * randNum);
|
| + }
|
| +
|
| + EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
|
| + time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate));
|
| +
|
| + // Log the status.
|
| + if (20 == frame_count)
|
| + EXPECT_EQ(EP_PRE_SPEECH, ep_status);
|
| + if (70 == frame_count)
|
| + EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
|
| + if (120 == frame_count)
|
| + EXPECT_EQ(EP_PRE_SPEECH, ep_status);
|
| + }
|
| +}
|
| +
|
| +// This test instantiates and initializes a stand alone endpointer module.
|
| +// The test creates FrameData objects with random noise and send them
|
| +// to the endointer module. The energy of the first 50 frames is low,
|
| +// followed by 500 high energy frames, and another 50 low energy frames.
|
| +// We test that the correct start and end frames were detected.
|
| +class EnergyEndpointerFrameProcessor : public FrameProcessor {
|
| + public:
|
| + explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
|
| + : endpointer_(endpointer) {}
|
| +
|
| + EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
|
| + endpointer_->ProcessAudioFrame(time, samples, kFrameSize);
|
| + int64 ep_time;
|
| + return endpointer_->Status(&ep_time);
|
| + }
|
| +
|
| + private:
|
| + EnergyEndpointer* endpointer_;
|
| +};
|
| +
|
| +TEST(EndpointerTest, TestEnergyEndpointerEvents) {
|
| + // Initialize endpointer and configure it. We specify the parameters
|
| + // here for a 20ms window, and a 20ms step size, which corrsponds to
|
| + // the narrow band AMR codec.
|
| + EnergyEndpointerParams ep_config;
|
| + ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
|
| + ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
|
| + ep_config.set_endpoint_margin(0.2f);
|
| + ep_config.set_onset_window(0.15f);
|
| + ep_config.set_speech_on_window(0.4f);
|
| + ep_config.set_offset_window(0.15f);
|
| + ep_config.set_onset_detect_dur(0.09f);
|
| + ep_config.set_onset_confirm_dur(0.075f);
|
| + ep_config.set_on_maintain_dur(0.10f);
|
| + ep_config.set_offset_confirm_dur(0.12f);
|
| + ep_config.set_decision_threshold(100.0f);
|
| + EnergyEndpointer endpointer;
|
| + endpointer.Init(ep_config);
|
| +
|
| + endpointer.StartSession();
|
| +
|
| + EnergyEndpointerFrameProcessor frame_processor(&endpointer);
|
| + RunEndpointerEventsTest(&frame_processor);
|
| +
|
| + endpointer.EndSession();
|
| +};
|
| +
|
| +// Test endpointer wrapper class.
|
| +class EndpointerFrameProcessor : public FrameProcessor {
|
| + public:
|
| + explicit EndpointerFrameProcessor(Endpointer* endpointer)
|
| + : endpointer_(endpointer) {}
|
| +
|
| + EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
|
| + endpointer_->ProcessAudio(samples, kFrameSize);
|
| + int64 ep_time;
|
| + return endpointer_->Status(&ep_time);
|
| + }
|
| +
|
| + private:
|
| + Endpointer* endpointer_;
|
| +};
|
| +
|
| +TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
|
| + const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
|
| +
|
| + Endpointer endpointer(kSampleRate);
|
| + const int64 kMillisecondsPerMicrosecond = 1000;
|
| + const int64 short_timeout = 300 * kMillisecondsPerMicrosecond;
|
| + endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
|
| + const int64 long_timeout = 500 * kMillisecondsPerMicrosecond;
|
| + endpointer.set_speech_input_complete_silence_length(long_timeout);
|
| + endpointer.StartSession();
|
| +
|
| + EndpointerFrameProcessor frame_processor(&endpointer);
|
| + RunEndpointerEventsTest(&frame_processor);
|
| +
|
| + endpointer.EndSession();
|
| +}
|
| +
|
| +} // namespace speech_input
|
|
|