OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // To know more about the algorithm used and the original code which this is | 5 // To know more about the algorithm used and the original code which this is |
6 // based of, see | 6 // based of, see |
7 // https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef | 7 // https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef |
8 | 8 |
9 #include "content/browser/speech/endpointer/energy_endpointer.h" | 9 #include "content/browser/speech/endpointer/energy_endpointer.h" |
10 | 10 |
11 #include <math.h> | 11 #include <math.h> |
| 12 #include <stddef.h> |
12 | 13 |
13 #include "base/logging.h" | 14 #include "base/logging.h" |
| 15 #include "base/macros.h" |
14 | 16 |
15 namespace { | 17 namespace { |
16 | 18 |
17 // Returns the RMS (quadratic mean) of the input signal. | 19 // Returns the RMS (quadratic mean) of the input signal. |
18 float RMS(const int16* samples, int num_samples) { | 20 float RMS(const int16_t* samples, int num_samples) { |
19 int64 ssq_int64 = 0; | 21 int64_t ssq_int64 = 0; |
20 int64 sum_int64 = 0; | 22 int64_t sum_int64 = 0; |
21 for (int i = 0; i < num_samples; ++i) { | 23 for (int i = 0; i < num_samples; ++i) { |
22 sum_int64 += samples[i]; | 24 sum_int64 += samples[i]; |
23 ssq_int64 += samples[i] * samples[i]; | 25 ssq_int64 += samples[i] * samples[i]; |
24 } | 26 } |
25 // now convert to floats. | 27 // now convert to floats. |
26 double sum = static_cast<double>(sum_int64); | 28 double sum = static_cast<double>(sum_int64); |
27 sum /= num_samples; | 29 sum /= num_samples; |
28 double ssq = static_cast<double>(ssq_int64); | 30 double ssq = static_cast<double>(ssq_int64); |
29 return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum))); | 31 return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum))); |
30 } | 32 } |
31 | 33 |
32 int64 Secs2Usecs(float seconds) { | 34 int64_t Secs2Usecs(float seconds) { |
33 return static_cast<int64>(0.5 + (1.0e6 * seconds)); | 35 return static_cast<int64_t>(0.5 + (1.0e6 * seconds)); |
34 } | 36 } |
35 | 37 |
36 float GetDecibel(float value) { | 38 float GetDecibel(float value) { |
37 if (value > 1.0e-100) | 39 if (value > 1.0e-100) |
38 return 20 * log10(value); | 40 return 20 * log10(value); |
39 return -2000.0; | 41 return -2000.0; |
40 } | 42 } |
41 | 43 |
42 } // namespace | 44 } // namespace |
43 | 45 |
44 namespace content { | 46 namespace content { |
45 | 47 |
46 // Stores threshold-crossing histories for making decisions about the speech | 48 // Stores threshold-crossing histories for making decisions about the speech |
47 // state. | 49 // state. |
48 class EnergyEndpointer::HistoryRing { | 50 class EnergyEndpointer::HistoryRing { |
49 public: | 51 public: |
50 HistoryRing() : insertion_index_(0) {} | 52 HistoryRing() : insertion_index_(0) {} |
51 | 53 |
52 // Resets the ring to |size| elements each with state |initial_state| | 54 // Resets the ring to |size| elements each with state |initial_state| |
53 void SetRing(int size, bool initial_state); | 55 void SetRing(int size, bool initial_state); |
54 | 56 |
55 // Inserts a new entry into the ring and drops the oldest entry. | 57 // Inserts a new entry into the ring and drops the oldest entry. |
56 void Insert(int64 time_us, bool decision); | 58 void Insert(int64_t time_us, bool decision); |
57 | 59 |
58 // Returns the time in microseconds of the most recently added entry. | 60 // Returns the time in microseconds of the most recently added entry. |
59 int64 EndTime() const; | 61 int64_t EndTime() const; |
60 | 62 |
61 // Returns the sum of all intervals during which 'decision' is true within | 63 // Returns the sum of all intervals during which 'decision' is true within |
62 // the time in seconds specified by 'duration'. The returned interval is | 64 // the time in seconds specified by 'duration'. The returned interval is |
63 // in seconds. | 65 // in seconds. |
64 float RingSum(float duration_sec); | 66 float RingSum(float duration_sec); |
65 | 67 |
66 private: | 68 private: |
67 struct DecisionPoint { | 69 struct DecisionPoint { |
68 int64 time_us; | 70 int64_t time_us; |
69 bool decision; | 71 bool decision; |
70 }; | 72 }; |
71 | 73 |
72 std::vector<DecisionPoint> decision_points_; | 74 std::vector<DecisionPoint> decision_points_; |
73 int insertion_index_; // Index at which the next item gets added/inserted. | 75 int insertion_index_; // Index at which the next item gets added/inserted. |
74 | 76 |
75 DISALLOW_COPY_AND_ASSIGN(HistoryRing); | 77 DISALLOW_COPY_AND_ASSIGN(HistoryRing); |
76 }; | 78 }; |
77 | 79 |
78 void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) { | 80 void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) { |
79 insertion_index_ = 0; | 81 insertion_index_ = 0; |
80 decision_points_.clear(); | 82 decision_points_.clear(); |
81 DecisionPoint init = { -1, initial_state }; | 83 DecisionPoint init = { -1, initial_state }; |
82 decision_points_.resize(size, init); | 84 decision_points_.resize(size, init); |
83 } | 85 } |
84 | 86 |
85 void EnergyEndpointer::HistoryRing::Insert(int64 time_us, bool decision) { | 87 void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) { |
86 decision_points_[insertion_index_].time_us = time_us; | 88 decision_points_[insertion_index_].time_us = time_us; |
87 decision_points_[insertion_index_].decision = decision; | 89 decision_points_[insertion_index_].decision = decision; |
88 insertion_index_ = (insertion_index_ + 1) % decision_points_.size(); | 90 insertion_index_ = (insertion_index_ + 1) % decision_points_.size(); |
89 } | 91 } |
90 | 92 |
91 int64 EnergyEndpointer::HistoryRing::EndTime() const { | 93 int64_t EnergyEndpointer::HistoryRing::EndTime() const { |
92 int ind = insertion_index_ - 1; | 94 int ind = insertion_index_ - 1; |
93 if (ind < 0) | 95 if (ind < 0) |
94 ind = decision_points_.size() - 1; | 96 ind = decision_points_.size() - 1; |
95 return decision_points_[ind].time_us; | 97 return decision_points_[ind].time_us; |
96 } | 98 } |
97 | 99 |
98 float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) { | 100 float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) { |
99 if (!decision_points_.size()) | 101 if (!decision_points_.size()) |
100 return 0.0; | 102 return 0.0; |
101 | 103 |
102 int64 sum_us = 0; | 104 int64_t sum_us = 0; |
103 int ind = insertion_index_ - 1; | 105 int ind = insertion_index_ - 1; |
104 if (ind < 0) | 106 if (ind < 0) |
105 ind = decision_points_.size() - 1; | 107 ind = decision_points_.size() - 1; |
106 int64 end_us = decision_points_[ind].time_us; | 108 int64_t end_us = decision_points_[ind].time_us; |
107 bool is_on = decision_points_[ind].decision; | 109 bool is_on = decision_points_[ind].decision; |
108 int64 start_us = end_us - static_cast<int64>(0.5 + (1.0e6 * duration_sec)); | 110 int64_t start_us = |
| 111 end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec)); |
109 if (start_us < 0) | 112 if (start_us < 0) |
110 start_us = 0; | 113 start_us = 0; |
111 size_t n_summed = 1; // n points ==> (n-1) intervals | 114 size_t n_summed = 1; // n points ==> (n-1) intervals |
112 while ((decision_points_[ind].time_us > start_us) && | 115 while ((decision_points_[ind].time_us > start_us) && |
113 (n_summed < decision_points_.size())) { | 116 (n_summed < decision_points_.size())) { |
114 --ind; | 117 --ind; |
115 if (ind < 0) | 118 if (ind < 0) |
116 ind = decision_points_.size() - 1; | 119 ind = decision_points_.size() - 1; |
117 if (is_on) | 120 if (is_on) |
118 sum_us += end_us - decision_points_[ind].time_us; | 121 sum_us += end_us - decision_points_[ind].time_us; |
(...skipping 20 matching lines...) Expand all Loading... |
139 rms_adapt_(0), | 142 rms_adapt_(0), |
140 start_lag_(0), | 143 start_lag_(0), |
141 end_lag_(0), | 144 end_lag_(0), |
142 user_input_start_time_us_(0) { | 145 user_input_start_time_us_(0) { |
143 } | 146 } |
144 | 147 |
145 EnergyEndpointer::~EnergyEndpointer() { | 148 EnergyEndpointer::~EnergyEndpointer() { |
146 } | 149 } |
147 | 150 |
148 int EnergyEndpointer::TimeToFrame(float time) const { | 151 int EnergyEndpointer::TimeToFrame(float time) const { |
149 return static_cast<int32>(0.5 + (time / params_.frame_period())); | 152 return static_cast<int32_t>(0.5 + (time / params_.frame_period())); |
150 } | 153 } |
151 | 154 |
152 void EnergyEndpointer::Restart(bool reset_threshold) { | 155 void EnergyEndpointer::Restart(bool reset_threshold) { |
153 status_ = EP_PRE_SPEECH; | 156 status_ = EP_PRE_SPEECH; |
154 user_input_start_time_us_ = 0; | 157 user_input_start_time_us_ = 0; |
155 | 158 |
156 if (reset_threshold) { | 159 if (reset_threshold) { |
157 decision_threshold_ = params_.decision_threshold(); | 160 decision_threshold_ = params_.decision_threshold(); |
158 rms_adapt_ = decision_threshold_; | 161 rms_adapt_ = decision_threshold_; |
159 noise_level_ = params_.decision_threshold() / 2.0f; | 162 noise_level_ = params_.decision_threshold() / 2.0f; |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
193 | 196 |
194 // Flag that indicates that current input should be used for | 197 // Flag that indicates that current input should be used for |
195 // estimating the environment. The user has not yet started input | 198 // estimating the environment. The user has not yet started input |
196 // by e.g. pressed the push-to-talk button. By default, this is | 199 // by e.g. pressed the push-to-talk button. By default, this is |
197 // false for backward compatibility. | 200 // false for backward compatibility. |
198 estimating_environment_ = false; | 201 estimating_environment_ = false; |
199 // The initial value of the noise and speech levels is inconsequential. | 202 // The initial value of the noise and speech levels is inconsequential. |
200 // The level of the first frame will overwrite these values. | 203 // The level of the first frame will overwrite these values. |
201 noise_level_ = params_.decision_threshold() / 2.0f; | 204 noise_level_ = params_.decision_threshold() / 2.0f; |
202 fast_update_frames_ = | 205 fast_update_frames_ = |
203 static_cast<int64>(params_.fast_update_dur() / params_.frame_period()); | 206 static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period()); |
204 | 207 |
205 frame_counter_ = 0; // Used for rapid initial update of levels. | 208 frame_counter_ = 0; // Used for rapid initial update of levels. |
206 | 209 |
207 sample_rate_ = params_.sample_rate(); | 210 sample_rate_ = params_.sample_rate(); |
208 start_lag_ = static_cast<int>(sample_rate_ / | 211 start_lag_ = static_cast<int>(sample_rate_ / |
209 params_.max_fundamental_frequency()); | 212 params_.max_fundamental_frequency()); |
210 end_lag_ = static_cast<int>(sample_rate_ / | 213 end_lag_ = static_cast<int>(sample_rate_ / |
211 params_.min_fundamental_frequency()); | 214 params_.min_fundamental_frequency()); |
212 } | 215 } |
213 | 216 |
214 void EnergyEndpointer::StartSession() { | 217 void EnergyEndpointer::StartSession() { |
215 Restart(true); | 218 Restart(true); |
216 } | 219 } |
217 | 220 |
218 void EnergyEndpointer::EndSession() { | 221 void EnergyEndpointer::EndSession() { |
219 status_ = EP_POST_SPEECH; | 222 status_ = EP_POST_SPEECH; |
220 } | 223 } |
221 | 224 |
222 void EnergyEndpointer::SetEnvironmentEstimationMode() { | 225 void EnergyEndpointer::SetEnvironmentEstimationMode() { |
223 Restart(true); | 226 Restart(true); |
224 estimating_environment_ = true; | 227 estimating_environment_ = true; |
225 } | 228 } |
226 | 229 |
227 void EnergyEndpointer::SetUserInputMode() { | 230 void EnergyEndpointer::SetUserInputMode() { |
228 estimating_environment_ = false; | 231 estimating_environment_ = false; |
229 user_input_start_time_us_ = endpointer_time_us_; | 232 user_input_start_time_us_ = endpointer_time_us_; |
230 } | 233 } |
231 | 234 |
232 void EnergyEndpointer::ProcessAudioFrame(int64 time_us, | 235 void EnergyEndpointer::ProcessAudioFrame(int64_t time_us, |
233 const int16* samples, | 236 const int16_t* samples, |
234 int num_samples, | 237 int num_samples, |
235 float* rms_out) { | 238 float* rms_out) { |
236 endpointer_time_us_ = time_us; | 239 endpointer_time_us_ = time_us; |
237 float rms = RMS(samples, num_samples); | 240 float rms = RMS(samples, num_samples); |
238 | 241 |
239 // Check that this is user input audio vs. pre-input adaptation audio. | 242 // Check that this is user input audio vs. pre-input adaptation audio. |
240 // Input audio starts when the user indicates start of input, by e.g. | 243 // Input audio starts when the user indicates start of input, by e.g. |
241 // pressing push-to-talk. Audio received prior to that is used to update | 244 // pressing push-to-talk. Audio received prior to that is used to update |
242 // noise and speech level estimates. | 245 // noise and speech level estimates. |
243 if (!estimating_environment_) { | 246 if (!estimating_environment_) { |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
361 noise_level_ = (0.95f * noise_level_) + (0.05f * rms); | 364 noise_level_ = (0.95f * noise_level_) + (0.05f * rms); |
362 } | 365 } |
363 if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) { | 366 if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) { |
364 decision_threshold_ = noise_level_ * 2; // 6dB above noise level. | 367 decision_threshold_ = noise_level_ * 2; // 6dB above noise level. |
365 // Set a floor | 368 // Set a floor |
366 if (decision_threshold_ < params_.min_decision_threshold()) | 369 if (decision_threshold_ < params_.min_decision_threshold()) |
367 decision_threshold_ = params_.min_decision_threshold(); | 370 decision_threshold_ = params_.min_decision_threshold(); |
368 } | 371 } |
369 } | 372 } |
370 | 373 |
371 EpStatus EnergyEndpointer::Status(int64* status_time) const { | 374 EpStatus EnergyEndpointer::Status(int64_t* status_time) const { |
372 *status_time = history_->EndTime(); | 375 *status_time = history_->EndTime(); |
373 return status_; | 376 return status_; |
374 } | 377 } |
375 | 378 |
376 } // namespace content | 379 } // namespace content |
OLD | NEW |