OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // The EnergyEndpointer class finds likely speech onset and offset points. | 5 // The EnergyEndpointer class finds likely speech onset and offset points. |
6 // | 6 // |
7 // The implementation described here is about the simplest possible. | 7 // The implementation described here is about the simplest possible. |
8 // It is based on timings of threshold crossings for overall signal | 8 // It is based on timings of threshold crossings for overall signal |
9 // RMS. It is suitable for light weight applications. | 9 // RMS. It is suitable for light weight applications. |
10 // | 10 // |
(...skipping 21 matching lines...) Expand all Loading... |
32 // Audio feedback contamination can appear in the input audio, if not cut | 32 // Audio feedback contamination can appear in the input audio, if not cut |
33 // out or handled by echo cancellation. Audio feedback can trigger a false | 33 // out or handled by echo cancellation. Audio feedback can trigger a false |
34 // accept. The false accepts can be ignored by setting | 34 // accept. The false accepts can be ignored by setting |
35 // ep_contamination_rejection_period. | 35 // ep_contamination_rejection_period. |
36 | 36 |
37 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 37 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
38 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 38 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
39 | 39 |
40 #include <stdint.h> | 40 #include <stdint.h> |
41 | 41 |
| 42 #include <memory> |
42 #include <vector> | 43 #include <vector> |
43 | 44 |
44 #include "base/macros.h" | 45 #include "base/macros.h" |
45 #include "base/memory/scoped_ptr.h" | |
46 #include "content/browser/speech/endpointer/energy_endpointer_params.h" | 46 #include "content/browser/speech/endpointer/energy_endpointer_params.h" |
47 #include "content/common/content_export.h" | 47 #include "content/common/content_export.h" |
48 | 48 |
49 namespace content { | 49 namespace content { |
50 | 50 |
51 // Endpointer status codes | 51 // Endpointer status codes |
52 enum EpStatus { | 52 enum EpStatus { |
53 EP_PRE_SPEECH = 10, | 53 EP_PRE_SPEECH = 10, |
54 EP_POSSIBLE_ONSET, | 54 EP_POSSIBLE_ONSET, |
55 EP_SPEECH_PRESENT, | 55 EP_SPEECH_PRESENT, |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
118 int64_t | 118 int64_t |
119 endpointer_time_us_; // Time of the most recently received audio frame. | 119 endpointer_time_us_; // Time of the most recently received audio frame. |
120 int64_t | 120 int64_t |
121 fast_update_frames_; // Number of frames for initial level adaptation. | 121 fast_update_frames_; // Number of frames for initial level adaptation. |
122 int64_t | 122 int64_t |
123 frame_counter_; // Number of frames seen. Used for initial adaptation. | 123 frame_counter_; // Number of frames seen. Used for initial adaptation. |
124 float max_window_dur_; // Largest search window size (seconds) | 124 float max_window_dur_; // Largest search window size (seconds) |
125 float sample_rate_; // Sampling rate. | 125 float sample_rate_; // Sampling rate. |
126 | 126 |
127 // Ring buffers to hold the speech activity history. | 127 // Ring buffers to hold the speech activity history. |
128 scoped_ptr<HistoryRing> history_; | 128 std::unique_ptr<HistoryRing> history_; |
129 | 129 |
130 // Configuration parameters. | 130 // Configuration parameters. |
131 EnergyEndpointerParams params_; | 131 EnergyEndpointerParams params_; |
132 | 132 |
133 // RMS which must be exceeded to conclude frame is speech. | 133 // RMS which must be exceeded to conclude frame is speech. |
134 float decision_threshold_; | 134 float decision_threshold_; |
135 | 135 |
136 // Flag to indicate that audio should be used to estimate environment, prior | 136 // Flag to indicate that audio should be used to estimate environment, prior |
137 // to receiving user input. | 137 // to receiving user input. |
138 bool estimating_environment_; | 138 bool estimating_environment_; |
(...skipping 13 matching lines...) Expand all Loading... |
152 // Time when mode switched from environment estimation to user input. This | 152 // Time when mode switched from environment estimation to user input. This |
153 // is used to time forced rejection of audio feedback contamination. | 153 // is used to time forced rejection of audio feedback contamination. |
154 int64_t user_input_start_time_us_; | 154 int64_t user_input_start_time_us_; |
155 | 155 |
156 DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer); | 156 DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer); |
157 }; | 157 }; |
158 | 158 |
159 } // namespace content | 159 } // namespace content |
160 | 160 |
161 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 161 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
OLD | NEW |