Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: chrome/browser/speech/endpointer/endpointer_unittest.cc

Issue 3117026: Add an endpointer for detecting end of speech. (Closed)
Patch Set: Merged with latest. Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/task.h"
6 #include "chrome/browser/speech/endpointer/endpointer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8
9 namespace {
10 const int kFrameRate = 50; // 20 ms long frames for AMR encoding.
11 const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
12
13 // At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
14 // to the AMR codec.
15 const int kFrameSize = kSampleRate / kFrameRate; // 160 samples.
16 COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
17 }
18
19 namespace speech_input {
20
21 class FrameProcessor {
22 public:
23 // Process a single frame of test audio samples.
24 virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0;
25 };
26
27 void RunEndpointerEventsTest(FrameProcessor* processor) {
28 int16 samples[kFrameSize];
29
30 // We will create a white noise signal of 150 frames. The frames from 50 to
31 // 100 will have more power, and the endpointer should fire on those frames.
32 const int kNumFrames = 150;
33
34 // Create a random sequence of samples.
35 srand(1);
36 float gain = 0.0;
37 int64 time = 0;
38 for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
39 // The frames from 50 to 100 will have more power, and the endpointer
40 // should detect those frames as speech.
41 if ((frame_count >= 50) && (frame_count < 100)) {
42 gain = 2000.0;
43 } else {
44 gain = 1.0;
45 }
46 // Create random samples.
47 for (int i = 0; i < kFrameSize; ++i) {
48 float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
49 static_cast<float>(RAND_MAX);
50 samples[i] = static_cast<int16>(gain * randNum);
51 }
52
53 EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
54 time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate));
55
56 // Log the status.
57 if (20 == frame_count)
58 EXPECT_EQ(EP_PRE_SPEECH, ep_status);
59 if (70 == frame_count)
60 EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
61 if (120 == frame_count)
62 EXPECT_EQ(EP_PRE_SPEECH, ep_status);
63 }
64 }
65
66 // This test instantiates and initializes a stand alone endpointer module.
67 // The test creates FrameData objects with random noise and send them
68 // to the endointer module. The energy of the first 50 frames is low,
69 // followed by 500 high energy frames, and another 50 low energy frames.
70 // We test that the correct start and end frames were detected.
71 class EnergyEndpointerFrameProcessor : public FrameProcessor {
72 public:
73 explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
74 : endpointer_(endpointer) {}
75
76 EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
77 endpointer_->ProcessAudioFrame(time, samples, kFrameSize);
78 int64 ep_time;
79 return endpointer_->Status(&ep_time);
80 }
81
82 private:
83 EnergyEndpointer* endpointer_;
84 };
85
86 TEST(EndpointerTest, TestEnergyEndpointerEvents) {
87 // Initialize endpointer and configure it. We specify the parameters
88 // here for a 20ms window, and a 20ms step size, which corrsponds to
89 // the narrow band AMR codec.
90 EnergyEndpointerParams ep_config;
91 ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
92 ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
93 ep_config.set_endpoint_margin(0.2f);
94 ep_config.set_onset_window(0.15f);
95 ep_config.set_speech_on_window(0.4f);
96 ep_config.set_offset_window(0.15f);
97 ep_config.set_onset_detect_dur(0.09f);
98 ep_config.set_onset_confirm_dur(0.075f);
99 ep_config.set_on_maintain_dur(0.10f);
100 ep_config.set_offset_confirm_dur(0.12f);
101 ep_config.set_decision_threshold(100.0f);
102 EnergyEndpointer endpointer;
103 endpointer.Init(ep_config);
104
105 endpointer.StartSession();
106
107 EnergyEndpointerFrameProcessor frame_processor(&endpointer);
108 RunEndpointerEventsTest(&frame_processor);
109
110 endpointer.EndSession();
111 };
112
113 // Test endpointer wrapper class.
114 class EndpointerFrameProcessor : public FrameProcessor {
115 public:
116 explicit EndpointerFrameProcessor(Endpointer* endpointer)
117 : endpointer_(endpointer) {}
118
119 EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) {
120 endpointer_->ProcessAudio(samples, kFrameSize);
121 int64 ep_time;
122 return endpointer_->Status(&ep_time);
123 }
124
125 private:
126 Endpointer* endpointer_;
127 };
128
129 TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
130 const int kSampleRate = 8000; // 8 k samples per second for AMR encoding.
131
132 Endpointer endpointer(kSampleRate);
133 const int64 kMillisecondsPerMicrosecond = 1000;
134 const int64 short_timeout = 300 * kMillisecondsPerMicrosecond;
135 endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
136 const int64 long_timeout = 500 * kMillisecondsPerMicrosecond;
137 endpointer.set_speech_input_complete_silence_length(long_timeout);
138 endpointer.StartSession();
139
140 EndpointerFrameProcessor frame_processor(&endpointer);
141 RunEndpointerEventsTest(&frame_processor);
142
143 endpointer.EndSession();
144 }
145
146 } // namespace speech_input
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698