OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "base/task.h" |
| 6 #include "chrome/browser/speech/endpointer/endpointer.h" |
| 7 #include "testing/gtest/include/gtest/gtest.h" |
| 8 |
| 9 namespace { |
| 10 const int kFrameRate = 50; // 20 ms long frames for AMR encoding. |
| 11 const int kSampleRate = 8000; // 8 k samples per second for AMR encoding. |
| 12 |
| 13 // At 8 sample per second a 20 ms frame is 160 samples, which corrsponds |
| 14 // to the AMR codec. |
| 15 const int kFrameSize = kSampleRate / kFrameRate; // 160 samples. |
| 16 COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size); |
| 17 } |
| 18 |
| 19 namespace speech_input { |
| 20 |
| 21 class FrameProcessor { |
| 22 public: |
| 23 // Process a single frame of test audio samples. |
| 24 virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0; |
| 25 }; |
| 26 |
| 27 void RunEndpointerEventsTest(FrameProcessor* processor) { |
| 28 int16 samples[kFrameSize]; |
| 29 |
| 30 // We will create a white noise signal of 150 frames. The frames from 50 to |
| 31 // 100 will have more power, and the endpointer should fire on those frames. |
| 32 const int kNumFrames = 150; |
| 33 |
| 34 // Create a random sequence of samples. |
| 35 srand(1); |
| 36 float gain = 0.0; |
| 37 int64 time = 0; |
| 38 for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) { |
| 39 // The frames from 50 to 100 will have more power, and the endpointer |
| 40 // should detect those frames as speech. |
| 41 if ((frame_count >= 50) && (frame_count < 100)) { |
| 42 gain = 2000.0; |
| 43 } else { |
| 44 gain = 1.0; |
| 45 } |
| 46 // Create random samples. |
| 47 for (int i = 0; i < kFrameSize; ++i) { |
| 48 float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) / |
| 49 static_cast<float>(RAND_MAX); |
| 50 samples[i] = static_cast<int16>(gain * randNum); |
| 51 } |
| 52 |
| 53 EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize); |
| 54 time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate)); |
| 55 |
| 56 // Log the status. |
| 57 if (20 == frame_count) |
| 58 EXPECT_EQ(EP_PRE_SPEECH, ep_status); |
| 59 if (70 == frame_count) |
| 60 EXPECT_EQ(EP_SPEECH_PRESENT, ep_status); |
| 61 if (120 == frame_count) |
| 62 EXPECT_EQ(EP_PRE_SPEECH, ep_status); |
| 63 } |
| 64 } |
| 65 |
| 66 // This test instantiates and initializes a stand alone endpointer module. |
| 67 // The test creates FrameData objects with random noise and send them |
| 68 // to the endointer module. The energy of the first 50 frames is low, |
| 69 // followed by 500 high energy frames, and another 50 low energy frames. |
| 70 // We test that the correct start and end frames were detected. |
| 71 class EnergyEndpointerFrameProcessor : public FrameProcessor { |
| 72 public: |
| 73 explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer) |
| 74 : endpointer_(endpointer) {} |
| 75 |
| 76 EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) { |
| 77 endpointer_->ProcessAudioFrame(time, samples, kFrameSize); |
| 78 int64 ep_time; |
| 79 return endpointer_->Status(&ep_time); |
| 80 } |
| 81 |
| 82 private: |
| 83 EnergyEndpointer* endpointer_; |
| 84 }; |
| 85 |
| 86 TEST(EndpointerTest, TestEnergyEndpointerEvents) { |
| 87 // Initialize endpointer and configure it. We specify the parameters |
| 88 // here for a 20ms window, and a 20ms step size, which corrsponds to |
| 89 // the narrow band AMR codec. |
| 90 EnergyEndpointerParams ep_config; |
| 91 ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate)); |
| 92 ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate)); |
| 93 ep_config.set_endpoint_margin(0.2f); |
| 94 ep_config.set_onset_window(0.15f); |
| 95 ep_config.set_speech_on_window(0.4f); |
| 96 ep_config.set_offset_window(0.15f); |
| 97 ep_config.set_onset_detect_dur(0.09f); |
| 98 ep_config.set_onset_confirm_dur(0.075f); |
| 99 ep_config.set_on_maintain_dur(0.10f); |
| 100 ep_config.set_offset_confirm_dur(0.12f); |
| 101 ep_config.set_decision_threshold(100.0f); |
| 102 EnergyEndpointer endpointer; |
| 103 endpointer.Init(ep_config); |
| 104 |
| 105 endpointer.StartSession(); |
| 106 |
| 107 EnergyEndpointerFrameProcessor frame_processor(&endpointer); |
| 108 RunEndpointerEventsTest(&frame_processor); |
| 109 |
| 110 endpointer.EndSession(); |
| 111 }; |
| 112 |
| 113 // Test endpointer wrapper class. |
| 114 class EndpointerFrameProcessor : public FrameProcessor { |
| 115 public: |
| 116 explicit EndpointerFrameProcessor(Endpointer* endpointer) |
| 117 : endpointer_(endpointer) {} |
| 118 |
| 119 EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) { |
| 120 endpointer_->ProcessAudio(samples, kFrameSize); |
| 121 int64 ep_time; |
| 122 return endpointer_->Status(&ep_time); |
| 123 } |
| 124 |
| 125 private: |
| 126 Endpointer* endpointer_; |
| 127 }; |
| 128 |
| 129 TEST(EndpointerTest, TestEmbeddedEndpointerEvents) { |
| 130 const int kSampleRate = 8000; // 8 k samples per second for AMR encoding. |
| 131 |
| 132 Endpointer endpointer(kSampleRate); |
| 133 const int64 kMillisecondsPerMicrosecond = 1000; |
| 134 const int64 short_timeout = 300 * kMillisecondsPerMicrosecond; |
| 135 endpointer.set_speech_input_possibly_complete_silence_length(short_timeout); |
| 136 const int64 long_timeout = 500 * kMillisecondsPerMicrosecond; |
| 137 endpointer.set_speech_input_complete_silence_length(long_timeout); |
| 138 endpointer.StartSession(); |
| 139 |
| 140 EndpointerFrameProcessor frame_processor(&endpointer); |
| 141 RunEndpointerEventsTest(&frame_processor); |
| 142 |
| 143 endpointer.EndSession(); |
| 144 } |
| 145 |
| 146 } // namespace speech_input |
OLD | NEW |