OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // The EnergyEndpointer class finds likely speech onset and offset points. | 5 // The EnergyEndpointer class finds likely speech onset and offset points. |
6 // | 6 // |
7 // The implementation described here is about the simplest possible. | 7 // The implementation described here is about the simplest possible. |
8 // It is based on timings of threshold crossings for overall signal | 8 // It is based on timings of threshold crossings for overall signal |
9 // RMS. It is suitable for light weight applications. | 9 // RMS. It is suitable for light weight applications. |
10 // | 10 // |
(...skipping 19 matching lines...) Expand all Loading... |
30 // EnvironmentEstimation mode, for at least 200ms, before switching to | 30 // EnvironmentEstimation mode, for at least 200ms, before switching to |
31 // UserInputMode. | 31 // UserInputMode. |
32 // Audio feedback contamination can appear in the input audio, if not cut | 32 // Audio feedback contamination can appear in the input audio, if not cut |
33 // out or handled by echo cancellation. Audio feedback can trigger a false | 33 // out or handled by echo cancellation. Audio feedback can trigger a false |
34 // accept. The false accepts can be ignored by setting | 34 // accept. The false accepts can be ignored by setting |
35 // ep_contamination_rejection_period. | 35 // ep_contamination_rejection_period. |
36 | 36 |
37 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 37 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
38 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 38 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
39 | 39 |
| 40 #include <stdint.h> |
| 41 |
40 #include <vector> | 42 #include <vector> |
41 | 43 |
42 #include "base/basictypes.h" | 44 #include "base/macros.h" |
43 #include "base/memory/scoped_ptr.h" | 45 #include "base/memory/scoped_ptr.h" |
44 #include "content/browser/speech/endpointer/energy_endpointer_params.h" | 46 #include "content/browser/speech/endpointer/energy_endpointer_params.h" |
45 #include "content/common/content_export.h" | 47 #include "content/common/content_export.h" |
46 | 48 |
47 namespace content { | 49 namespace content { |
48 | 50 |
49 // Endpointer status codes | 51 // Endpointer status codes |
50 enum EpStatus { | 52 enum EpStatus { |
51 EP_PRE_SPEECH = 10, | 53 EP_PRE_SPEECH = 10, |
52 EP_POSSIBLE_ONSET, | 54 EP_POSSIBLE_ONSET, |
(...skipping 20 matching lines...) Expand all Loading... |
73 // Start environment estimation. Audio will be used for environment estimation | 75 // Start environment estimation. Audio will be used for environment estimation |
74 // i.e. noise level estimation. | 76 // i.e. noise level estimation. |
75 void SetEnvironmentEstimationMode(); | 77 void SetEnvironmentEstimationMode(); |
76 | 78 |
77 // Start user input. This should be called when the user indicates start of | 79 // Start user input. This should be called when the user indicates start of |
78 // input, e.g. by pressing a button. | 80 // input, e.g. by pressing a button. |
79 void SetUserInputMode(); | 81 void SetUserInputMode(); |
80 | 82 |
81 // Computes the next input frame and modifies EnergyEndpointer status as | 83 // Computes the next input frame and modifies EnergyEndpointer status as |
82 // appropriate based on the computation. | 84 // appropriate based on the computation. |
83 void ProcessAudioFrame(int64 time_us, | 85 void ProcessAudioFrame(int64_t time_us, |
84 const int16* samples, int num_samples, | 86 const int16_t* samples, |
| 87 int num_samples, |
85 float* rms_out); | 88 float* rms_out); |
86 | 89 |
87 // Returns the current state of the EnergyEndpointer and the time | 90 // Returns the current state of the EnergyEndpointer and the time |
88 // corresponding to the most recently computed frame. | 91 // corresponding to the most recently computed frame. |
89 EpStatus Status(int64* status_time_us) const; | 92 EpStatus Status(int64_t* status_time_us) const; |
90 | 93 |
91 bool estimating_environment() const { | 94 bool estimating_environment() const { |
92 return estimating_environment_; | 95 return estimating_environment_; |
93 } | 96 } |
94 | 97 |
95 // Returns estimated noise level in dB. | 98 // Returns estimated noise level in dB. |
96 float GetNoiseLevelDb() const; | 99 float GetNoiseLevelDb() const; |
97 | 100 |
98 private: | 101 private: |
99 class HistoryRing; | 102 class HistoryRing; |
100 | 103 |
101 // Resets the endpointer internal state. If reset_threshold is true, the | 104 // Resets the endpointer internal state. If reset_threshold is true, the |
102 // state will be reset completely, including adaptive thresholds and the | 105 // state will be reset completely, including adaptive thresholds and the |
103 // removal of all history information. | 106 // removal of all history information. |
104 void Restart(bool reset_threshold); | 107 void Restart(bool reset_threshold); |
105 | 108 |
106 // Update internal speech and noise levels. | 109 // Update internal speech and noise levels. |
107 void UpdateLevels(float rms); | 110 void UpdateLevels(float rms); |
108 | 111 |
109 // Returns the number of frames (or frame number) corresponding to | 112 // Returns the number of frames (or frame number) corresponding to |
110 // the 'time' (in seconds). | 113 // the 'time' (in seconds). |
111 int TimeToFrame(float time) const; | 114 int TimeToFrame(float time) const; |
112 | 115 |
113 EpStatus status_; // The current state of this instance. | 116 EpStatus status_; // The current state of this instance. |
114 float offset_confirm_dur_sec_; // max on time allowed to confirm POST_SPEECH | 117 float offset_confirm_dur_sec_; // max on time allowed to confirm POST_SPEECH |
115 int64 endpointer_time_us_; // Time of the most recently received audio frame. | 118 int64_t |
116 int64 fast_update_frames_; // Number of frames for initial level adaptation. | 119 endpointer_time_us_; // Time of the most recently received audio frame. |
117 int64 frame_counter_; // Number of frames seen. Used for initial adaptation. | 120 int64_t |
| 121 fast_update_frames_; // Number of frames for initial level adaptation. |
| 122 int64_t |
| 123 frame_counter_; // Number of frames seen. Used for initial adaptation. |
118 float max_window_dur_; // Largest search window size (seconds) | 124 float max_window_dur_; // Largest search window size (seconds) |
119 float sample_rate_; // Sampling rate. | 125 float sample_rate_; // Sampling rate. |
120 | 126 |
121 // Ring buffers to hold the speech activity history. | 127 // Ring buffers to hold the speech activity history. |
122 scoped_ptr<HistoryRing> history_; | 128 scoped_ptr<HistoryRing> history_; |
123 | 129 |
124 // Configuration parameters. | 130 // Configuration parameters. |
125 EnergyEndpointerParams params_; | 131 EnergyEndpointerParams params_; |
126 | 132 |
127 // RMS which must be exceeded to conclude frame is speech. | 133 // RMS which must be exceeded to conclude frame is speech. |
(...skipping 10 matching lines...) Expand all Loading... |
138 float rms_adapt_; | 144 float rms_adapt_; |
139 | 145 |
140 // Start lag corresponds to the highest fundamental frequency. | 146 // Start lag corresponds to the highest fundamental frequency. |
141 int start_lag_; | 147 int start_lag_; |
142 | 148 |
143 // End lag corresponds to the lowest fundamental frequency. | 149 // End lag corresponds to the lowest fundamental frequency. |
144 int end_lag_; | 150 int end_lag_; |
145 | 151 |
146 // Time when mode switched from environment estimation to user input. This | 152 // Time when mode switched from environment estimation to user input. This |
147 // is used to time forced rejection of audio feedback contamination. | 153 // is used to time forced rejection of audio feedback contamination. |
148 int64 user_input_start_time_us_; | 154 int64_t user_input_start_time_us_; |
149 | 155 |
150 DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer); | 156 DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer); |
151 }; | 157 }; |
152 | 158 |
153 } // namespace content | 159 } // namespace content |
154 | 160 |
155 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 161 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
OLD | NEW |