| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/endpointer/endpointer.h" | 5 #include "content/browser/speech/endpointer/endpointer.h" |
| 6 | 6 |
| 7 #include "base/time/time.h" | 7 #include "base/time/time.h" |
| 8 #include "content/browser/speech/audio_buffer.h" | 8 #include "content/browser/speech/audio_buffer.h" |
| 9 | 9 |
| 10 using base::Time; | 10 //using base::Time; // Collides with typedef Time in X11 |
| 11 | 11 |
| 12 namespace { | 12 namespace { |
| 13 const int kFrameRate = 50; // 1 frame = 20ms of audio. | 13 const int kFrameRate = 50; // 1 frame = 20ms of audio. |
| 14 } | 14 } |
| 15 | 15 |
| 16 namespace content { | 16 namespace content { |
| 17 | 17 |
| 18 Endpointer::Endpointer(int sample_rate) | 18 Endpointer::Endpointer(int sample_rate) |
| 19 : speech_input_possibly_complete_silence_length_us_(-1), | 19 : speech_input_possibly_complete_silence_length_us_(-1), |
| 20 speech_input_complete_silence_length_us_(-1), | 20 speech_input_complete_silence_length_us_(-1), |
| 21 audio_frame_time_us_(0), | 21 audio_frame_time_us_(0), |
| 22 sample_rate_(sample_rate), | 22 sample_rate_(sample_rate), |
| 23 frame_size_(0) { | 23 frame_size_(0) { |
| 24 Reset(); | 24 Reset(); |
| 25 | 25 |
| 26 frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate)); | 26 frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate)); |
| 27 | 27 |
| 28 speech_input_minimum_length_us_ = | 28 speech_input_minimum_length_us_ = |
| 29 static_cast<int64_t>(1.7 * Time::kMicrosecondsPerSecond); | 29 static_cast<int64_t>(1.7 * base::Time::kMicrosecondsPerSecond); |
| 30 speech_input_complete_silence_length_us_ = | 30 speech_input_complete_silence_length_us_ = |
| 31 static_cast<int64_t>(0.5 * Time::kMicrosecondsPerSecond); | 31 static_cast<int64_t>(0.5 * base::Time::kMicrosecondsPerSecond); |
| 32 long_speech_input_complete_silence_length_us_ = -1; | 32 long_speech_input_complete_silence_length_us_ = -1; |
| 33 long_speech_length_us_ = -1; | 33 long_speech_length_us_ = -1; |
| 34 speech_input_possibly_complete_silence_length_us_ = | 34 speech_input_possibly_complete_silence_length_us_ = |
| 35 1 * Time::kMicrosecondsPerSecond; | 35 1 * base::Time::kMicrosecondsPerSecond; |
| 36 | 36 |
| 37 // Set the default configuration for Push To Talk mode. | 37 // Set the default configuration for Push To Talk mode. |
| 38 EnergyEndpointerParams ep_config; | 38 EnergyEndpointerParams ep_config; |
| 39 ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate)); | 39 ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate)); |
| 40 ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate)); | 40 ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate)); |
| 41 ep_config.set_endpoint_margin(0.2f); | 41 ep_config.set_endpoint_margin(0.2f); |
| 42 ep_config.set_onset_window(0.15f); | 42 ep_config.set_onset_window(0.15f); |
| 43 ep_config.set_speech_on_window(0.4f); | 43 ep_config.set_speech_on_window(0.4f); |
| 44 ep_config.set_offset_window(0.15f); | 44 ep_config.set_offset_window(0.15f); |
| 45 ep_config.set_onset_detect_dur(0.09f); | 45 ep_config.set_onset_detect_dur(0.09f); |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 // frames at the end (which is ok since typically the caller will be recording | 98 // frames at the end (which is ok since typically the caller will be recording |
| 99 // audio in multiples of our frame size). | 99 // audio in multiples of our frame size). |
| 100 int sample_index = 0; | 100 int sample_index = 0; |
| 101 while (sample_index + frame_size_ <= num_samples) { | 101 while (sample_index + frame_size_ <= num_samples) { |
| 102 // Have the endpointer process the frame. | 102 // Have the endpointer process the frame. |
| 103 energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_, | 103 energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_, |
| 104 audio_data + sample_index, | 104 audio_data + sample_index, |
| 105 frame_size_, | 105 frame_size_, |
| 106 rms_out); | 106 rms_out); |
| 107 sample_index += frame_size_; | 107 sample_index += frame_size_; |
| 108 audio_frame_time_us_ += (frame_size_ * Time::kMicrosecondsPerSecond) / | 108 audio_frame_time_us_ += (frame_size_ * base::Time::kMicrosecondsPerSecond) / |
| 109 sample_rate_; | 109 sample_rate_; |
| 110 | 110 |
| 111 // Get the status of the endpointer. | 111 // Get the status of the endpointer. |
| 112 int64_t ep_time; | 112 int64_t ep_time; |
| 113 ep_status = energy_endpointer_.Status(&ep_time); | 113 ep_status = energy_endpointer_.Status(&ep_time); |
| 114 | 114 |
| 115 // Handle state changes. | 115 // Handle state changes. |
| 116 if ((EP_SPEECH_PRESENT == ep_status) && | 116 if ((EP_SPEECH_PRESENT == ep_status) && |
| 117 (EP_POSSIBLE_ONSET == old_ep_status_)) { | 117 (EP_POSSIBLE_ONSET == old_ep_status_)) { |
| 118 speech_end_time_us_ = -1; | 118 speech_end_time_us_ = -1; |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 160 speech_input_complete_ = true; | 160 speech_input_complete_ = true; |
| 161 } | 161 } |
| 162 } | 162 } |
| 163 } | 163 } |
| 164 old_ep_status_ = ep_status; | 164 old_ep_status_ = ep_status; |
| 165 } | 165 } |
| 166 return ep_status; | 166 return ep_status; |
| 167 } | 167 } |
| 168 | 168 |
| 169 } // namespace content | 169 } // namespace content |
| OLD | NEW |