| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // To know more about the algorithm used and the original code which this is | 5 // To know more about the algorithm used and the original code which this is |
| 6 // based of, see | 6 // based of, see |
| 7 // https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef | 7 // https://wiki.corp.google.com/twiki/bin/view/Main/ChromeGoogleCodeXRef |
| 8 | 8 |
| 9 #include "content/browser/speech/endpointer/energy_endpointer.h" | 9 #include "content/browser/speech/endpointer/energy_endpointer.h" |
| 10 | 10 |
| 11 #include <math.h> | 11 #include <math.h> |
| 12 #include <stddef.h> |
| 12 | 13 |
| 13 #include "base/logging.h" | 14 #include "base/logging.h" |
| 15 #include "base/macros.h" |
| 14 | 16 |
| 15 namespace { | 17 namespace { |
| 16 | 18 |
| 17 // Returns the RMS (quadratic mean) of the input signal. | 19 // Returns the RMS (quadratic mean) of the input signal. |
| 18 float RMS(const int16* samples, int num_samples) { | 20 float RMS(const int16_t* samples, int num_samples) { |
| 19 int64 ssq_int64 = 0; | 21 int64_t ssq_int64 = 0; |
| 20 int64 sum_int64 = 0; | 22 int64_t sum_int64 = 0; |
| 21 for (int i = 0; i < num_samples; ++i) { | 23 for (int i = 0; i < num_samples; ++i) { |
| 22 sum_int64 += samples[i]; | 24 sum_int64 += samples[i]; |
| 23 ssq_int64 += samples[i] * samples[i]; | 25 ssq_int64 += samples[i] * samples[i]; |
| 24 } | 26 } |
| 25 // now convert to floats. | 27 // now convert to floats. |
| 26 double sum = static_cast<double>(sum_int64); | 28 double sum = static_cast<double>(sum_int64); |
| 27 sum /= num_samples; | 29 sum /= num_samples; |
| 28 double ssq = static_cast<double>(ssq_int64); | 30 double ssq = static_cast<double>(ssq_int64); |
| 29 return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum))); | 31 return static_cast<float>(sqrt((ssq / num_samples) - (sum * sum))); |
| 30 } | 32 } |
| 31 | 33 |
| 32 int64 Secs2Usecs(float seconds) { | 34 int64_t Secs2Usecs(float seconds) { |
| 33 return static_cast<int64>(0.5 + (1.0e6 * seconds)); | 35 return static_cast<int64_t>(0.5 + (1.0e6 * seconds)); |
| 34 } | 36 } |
| 35 | 37 |
| 36 float GetDecibel(float value) { | 38 float GetDecibel(float value) { |
| 37 if (value > 1.0e-100) | 39 if (value > 1.0e-100) |
| 38 return 20 * log10(value); | 40 return 20 * log10(value); |
| 39 return -2000.0; | 41 return -2000.0; |
| 40 } | 42 } |
| 41 | 43 |
| 42 } // namespace | 44 } // namespace |
| 43 | 45 |
| 44 namespace content { | 46 namespace content { |
| 45 | 47 |
| 46 // Stores threshold-crossing histories for making decisions about the speech | 48 // Stores threshold-crossing histories for making decisions about the speech |
| 47 // state. | 49 // state. |
| 48 class EnergyEndpointer::HistoryRing { | 50 class EnergyEndpointer::HistoryRing { |
| 49 public: | 51 public: |
| 50 HistoryRing() : insertion_index_(0) {} | 52 HistoryRing() : insertion_index_(0) {} |
| 51 | 53 |
| 52 // Resets the ring to |size| elements each with state |initial_state| | 54 // Resets the ring to |size| elements each with state |initial_state| |
| 53 void SetRing(int size, bool initial_state); | 55 void SetRing(int size, bool initial_state); |
| 54 | 56 |
| 55 // Inserts a new entry into the ring and drops the oldest entry. | 57 // Inserts a new entry into the ring and drops the oldest entry. |
| 56 void Insert(int64 time_us, bool decision); | 58 void Insert(int64_t time_us, bool decision); |
| 57 | 59 |
| 58 // Returns the time in microseconds of the most recently added entry. | 60 // Returns the time in microseconds of the most recently added entry. |
| 59 int64 EndTime() const; | 61 int64_t EndTime() const; |
| 60 | 62 |
| 61 // Returns the sum of all intervals during which 'decision' is true within | 63 // Returns the sum of all intervals during which 'decision' is true within |
| 62 // the time in seconds specified by 'duration'. The returned interval is | 64 // the time in seconds specified by 'duration'. The returned interval is |
| 63 // in seconds. | 65 // in seconds. |
| 64 float RingSum(float duration_sec); | 66 float RingSum(float duration_sec); |
| 65 | 67 |
| 66 private: | 68 private: |
| 67 struct DecisionPoint { | 69 struct DecisionPoint { |
| 68 int64 time_us; | 70 int64_t time_us; |
| 69 bool decision; | 71 bool decision; |
| 70 }; | 72 }; |
| 71 | 73 |
| 72 std::vector<DecisionPoint> decision_points_; | 74 std::vector<DecisionPoint> decision_points_; |
| 73 int insertion_index_; // Index at which the next item gets added/inserted. | 75 int insertion_index_; // Index at which the next item gets added/inserted. |
| 74 | 76 |
| 75 DISALLOW_COPY_AND_ASSIGN(HistoryRing); | 77 DISALLOW_COPY_AND_ASSIGN(HistoryRing); |
| 76 }; | 78 }; |
| 77 | 79 |
| 78 void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) { | 80 void EnergyEndpointer::HistoryRing::SetRing(int size, bool initial_state) { |
| 79 insertion_index_ = 0; | 81 insertion_index_ = 0; |
| 80 decision_points_.clear(); | 82 decision_points_.clear(); |
| 81 DecisionPoint init = { -1, initial_state }; | 83 DecisionPoint init = { -1, initial_state }; |
| 82 decision_points_.resize(size, init); | 84 decision_points_.resize(size, init); |
| 83 } | 85 } |
| 84 | 86 |
| 85 void EnergyEndpointer::HistoryRing::Insert(int64 time_us, bool decision) { | 87 void EnergyEndpointer::HistoryRing::Insert(int64_t time_us, bool decision) { |
| 86 decision_points_[insertion_index_].time_us = time_us; | 88 decision_points_[insertion_index_].time_us = time_us; |
| 87 decision_points_[insertion_index_].decision = decision; | 89 decision_points_[insertion_index_].decision = decision; |
| 88 insertion_index_ = (insertion_index_ + 1) % decision_points_.size(); | 90 insertion_index_ = (insertion_index_ + 1) % decision_points_.size(); |
| 89 } | 91 } |
| 90 | 92 |
| 91 int64 EnergyEndpointer::HistoryRing::EndTime() const { | 93 int64_t EnergyEndpointer::HistoryRing::EndTime() const { |
| 92 int ind = insertion_index_ - 1; | 94 int ind = insertion_index_ - 1; |
| 93 if (ind < 0) | 95 if (ind < 0) |
| 94 ind = decision_points_.size() - 1; | 96 ind = decision_points_.size() - 1; |
| 95 return decision_points_[ind].time_us; | 97 return decision_points_[ind].time_us; |
| 96 } | 98 } |
| 97 | 99 |
| 98 float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) { | 100 float EnergyEndpointer::HistoryRing::RingSum(float duration_sec) { |
| 99 if (!decision_points_.size()) | 101 if (!decision_points_.size()) |
| 100 return 0.0; | 102 return 0.0; |
| 101 | 103 |
| 102 int64 sum_us = 0; | 104 int64_t sum_us = 0; |
| 103 int ind = insertion_index_ - 1; | 105 int ind = insertion_index_ - 1; |
| 104 if (ind < 0) | 106 if (ind < 0) |
| 105 ind = decision_points_.size() - 1; | 107 ind = decision_points_.size() - 1; |
| 106 int64 end_us = decision_points_[ind].time_us; | 108 int64_t end_us = decision_points_[ind].time_us; |
| 107 bool is_on = decision_points_[ind].decision; | 109 bool is_on = decision_points_[ind].decision; |
| 108 int64 start_us = end_us - static_cast<int64>(0.5 + (1.0e6 * duration_sec)); | 110 int64_t start_us = |
| 111 end_us - static_cast<int64_t>(0.5 + (1.0e6 * duration_sec)); |
| 109 if (start_us < 0) | 112 if (start_us < 0) |
| 110 start_us = 0; | 113 start_us = 0; |
| 111 size_t n_summed = 1; // n points ==> (n-1) intervals | 114 size_t n_summed = 1; // n points ==> (n-1) intervals |
| 112 while ((decision_points_[ind].time_us > start_us) && | 115 while ((decision_points_[ind].time_us > start_us) && |
| 113 (n_summed < decision_points_.size())) { | 116 (n_summed < decision_points_.size())) { |
| 114 --ind; | 117 --ind; |
| 115 if (ind < 0) | 118 if (ind < 0) |
| 116 ind = decision_points_.size() - 1; | 119 ind = decision_points_.size() - 1; |
| 117 if (is_on) | 120 if (is_on) |
| 118 sum_us += end_us - decision_points_[ind].time_us; | 121 sum_us += end_us - decision_points_[ind].time_us; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 139 rms_adapt_(0), | 142 rms_adapt_(0), |
| 140 start_lag_(0), | 143 start_lag_(0), |
| 141 end_lag_(0), | 144 end_lag_(0), |
| 142 user_input_start_time_us_(0) { | 145 user_input_start_time_us_(0) { |
| 143 } | 146 } |
| 144 | 147 |
| 145 EnergyEndpointer::~EnergyEndpointer() { | 148 EnergyEndpointer::~EnergyEndpointer() { |
| 146 } | 149 } |
| 147 | 150 |
| 148 int EnergyEndpointer::TimeToFrame(float time) const { | 151 int EnergyEndpointer::TimeToFrame(float time) const { |
| 149 return static_cast<int32>(0.5 + (time / params_.frame_period())); | 152 return static_cast<int32_t>(0.5 + (time / params_.frame_period())); |
| 150 } | 153 } |
| 151 | 154 |
| 152 void EnergyEndpointer::Restart(bool reset_threshold) { | 155 void EnergyEndpointer::Restart(bool reset_threshold) { |
| 153 status_ = EP_PRE_SPEECH; | 156 status_ = EP_PRE_SPEECH; |
| 154 user_input_start_time_us_ = 0; | 157 user_input_start_time_us_ = 0; |
| 155 | 158 |
| 156 if (reset_threshold) { | 159 if (reset_threshold) { |
| 157 decision_threshold_ = params_.decision_threshold(); | 160 decision_threshold_ = params_.decision_threshold(); |
| 158 rms_adapt_ = decision_threshold_; | 161 rms_adapt_ = decision_threshold_; |
| 159 noise_level_ = params_.decision_threshold() / 2.0f; | 162 noise_level_ = params_.decision_threshold() / 2.0f; |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 193 | 196 |
| 194 // Flag that indicates that current input should be used for | 197 // Flag that indicates that current input should be used for |
| 195 // estimating the environment. The user has not yet started input | 198 // estimating the environment. The user has not yet started input |
| 196 // by e.g. pressed the push-to-talk button. By default, this is | 199 // by e.g. pressed the push-to-talk button. By default, this is |
| 197 // false for backward compatibility. | 200 // false for backward compatibility. |
| 198 estimating_environment_ = false; | 201 estimating_environment_ = false; |
| 199 // The initial value of the noise and speech levels is inconsequential. | 202 // The initial value of the noise and speech levels is inconsequential. |
| 200 // The level of the first frame will overwrite these values. | 203 // The level of the first frame will overwrite these values. |
| 201 noise_level_ = params_.decision_threshold() / 2.0f; | 204 noise_level_ = params_.decision_threshold() / 2.0f; |
| 202 fast_update_frames_ = | 205 fast_update_frames_ = |
| 203 static_cast<int64>(params_.fast_update_dur() / params_.frame_period()); | 206 static_cast<int64_t>(params_.fast_update_dur() / params_.frame_period()); |
| 204 | 207 |
| 205 frame_counter_ = 0; // Used for rapid initial update of levels. | 208 frame_counter_ = 0; // Used for rapid initial update of levels. |
| 206 | 209 |
| 207 sample_rate_ = params_.sample_rate(); | 210 sample_rate_ = params_.sample_rate(); |
| 208 start_lag_ = static_cast<int>(sample_rate_ / | 211 start_lag_ = static_cast<int>(sample_rate_ / |
| 209 params_.max_fundamental_frequency()); | 212 params_.max_fundamental_frequency()); |
| 210 end_lag_ = static_cast<int>(sample_rate_ / | 213 end_lag_ = static_cast<int>(sample_rate_ / |
| 211 params_.min_fundamental_frequency()); | 214 params_.min_fundamental_frequency()); |
| 212 } | 215 } |
| 213 | 216 |
| 214 void EnergyEndpointer::StartSession() { | 217 void EnergyEndpointer::StartSession() { |
| 215 Restart(true); | 218 Restart(true); |
| 216 } | 219 } |
| 217 | 220 |
| 218 void EnergyEndpointer::EndSession() { | 221 void EnergyEndpointer::EndSession() { |
| 219 status_ = EP_POST_SPEECH; | 222 status_ = EP_POST_SPEECH; |
| 220 } | 223 } |
| 221 | 224 |
| 222 void EnergyEndpointer::SetEnvironmentEstimationMode() { | 225 void EnergyEndpointer::SetEnvironmentEstimationMode() { |
| 223 Restart(true); | 226 Restart(true); |
| 224 estimating_environment_ = true; | 227 estimating_environment_ = true; |
| 225 } | 228 } |
| 226 | 229 |
| 227 void EnergyEndpointer::SetUserInputMode() { | 230 void EnergyEndpointer::SetUserInputMode() { |
| 228 estimating_environment_ = false; | 231 estimating_environment_ = false; |
| 229 user_input_start_time_us_ = endpointer_time_us_; | 232 user_input_start_time_us_ = endpointer_time_us_; |
| 230 } | 233 } |
| 231 | 234 |
| 232 void EnergyEndpointer::ProcessAudioFrame(int64 time_us, | 235 void EnergyEndpointer::ProcessAudioFrame(int64_t time_us, |
| 233 const int16* samples, | 236 const int16_t* samples, |
| 234 int num_samples, | 237 int num_samples, |
| 235 float* rms_out) { | 238 float* rms_out) { |
| 236 endpointer_time_us_ = time_us; | 239 endpointer_time_us_ = time_us; |
| 237 float rms = RMS(samples, num_samples); | 240 float rms = RMS(samples, num_samples); |
| 238 | 241 |
| 239 // Check that this is user input audio vs. pre-input adaptation audio. | 242 // Check that this is user input audio vs. pre-input adaptation audio. |
| 240 // Input audio starts when the user indicates start of input, by e.g. | 243 // Input audio starts when the user indicates start of input, by e.g. |
| 241 // pressing push-to-talk. Audio received prior to that is used to update | 244 // pressing push-to-talk. Audio received prior to that is used to update |
| 242 // noise and speech level estimates. | 245 // noise and speech level estimates. |
| 243 if (!estimating_environment_) { | 246 if (!estimating_environment_) { |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 361 noise_level_ = (0.95f * noise_level_) + (0.05f * rms); | 364 noise_level_ = (0.95f * noise_level_) + (0.05f * rms); |
| 362 } | 365 } |
| 363 if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) { | 366 if (estimating_environment_ || (frame_counter_ < fast_update_frames_)) { |
| 364 decision_threshold_ = noise_level_ * 2; // 6dB above noise level. | 367 decision_threshold_ = noise_level_ * 2; // 6dB above noise level. |
| 365 // Set a floor | 368 // Set a floor |
| 366 if (decision_threshold_ < params_.min_decision_threshold()) | 369 if (decision_threshold_ < params_.min_decision_threshold()) |
| 367 decision_threshold_ = params_.min_decision_threshold(); | 370 decision_threshold_ = params_.min_decision_threshold(); |
| 368 } | 371 } |
| 369 } | 372 } |
| 370 | 373 |
| 371 EpStatus EnergyEndpointer::Status(int64* status_time) const { | 374 EpStatus EnergyEndpointer::Status(int64_t* status_time) const { |
| 372 *status_time = history_->EndTime(); | 375 *status_time = history_->EndTime(); |
| 373 return status_; | 376 return status_; |
| 374 } | 377 } |
| 375 | 378 |
| 376 } // namespace content | 379 } // namespace content |
| OLD | NEW |