| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // The EnergyEndpointer class finds likely speech onset and offset points. | 5 // The EnergyEndpointer class finds likely speech onset and offset points. |
| 6 // | 6 // |
| 7 // The implementation described here is about the simplest possible. | 7 // The implementation described here is about the simplest possible. |
| 8 // It is based on timings of threshold crossings for overall signal | 8 // It is based on timings of threshold crossings for overall signal |
| 9 // RMS. It is suitable for light weight applications. | 9 // RMS. It is suitable for light weight applications. |
| 10 // | 10 // |
| (...skipping 21 matching lines...) Expand all Loading... |
| 32 // Audio feedback contamination can appear in the input audio, if not cut | 32 // Audio feedback contamination can appear in the input audio, if not cut |
| 33 // out or handled by echo cancellation. Audio feedback can trigger a false | 33 // out or handled by echo cancellation. Audio feedback can trigger a false |
| 34 // accept. The false accepts can be ignored by setting | 34 // accept. The false accepts can be ignored by setting |
| 35 // ep_contamination_rejection_period. | 35 // ep_contamination_rejection_period. |
| 36 | 36 |
| 37 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 37 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
| 38 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 38 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
| 39 | 39 |
| 40 #include <stdint.h> | 40 #include <stdint.h> |
| 41 | 41 |
| 42 #include <memory> |
| 42 #include <vector> | 43 #include <vector> |
| 43 | 44 |
| 44 #include "base/macros.h" | 45 #include "base/macros.h" |
| 45 #include "base/memory/scoped_ptr.h" | |
| 46 #include "content/browser/speech/endpointer/energy_endpointer_params.h" | 46 #include "content/browser/speech/endpointer/energy_endpointer_params.h" |
| 47 #include "content/common/content_export.h" | 47 #include "content/common/content_export.h" |
| 48 | 48 |
| 49 namespace content { | 49 namespace content { |
| 50 | 50 |
| 51 // Endpointer status codes | 51 // Endpointer status codes |
| 52 enum EpStatus { | 52 enum EpStatus { |
| 53 EP_PRE_SPEECH = 10, | 53 EP_PRE_SPEECH = 10, |
| 54 EP_POSSIBLE_ONSET, | 54 EP_POSSIBLE_ONSET, |
| 55 EP_SPEECH_PRESENT, | 55 EP_SPEECH_PRESENT, |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 118 int64_t | 118 int64_t |
| 119 endpointer_time_us_; // Time of the most recently received audio frame. | 119 endpointer_time_us_; // Time of the most recently received audio frame. |
| 120 int64_t | 120 int64_t |
| 121 fast_update_frames_; // Number of frames for initial level adaptation. | 121 fast_update_frames_; // Number of frames for initial level adaptation. |
| 122 int64_t | 122 int64_t |
| 123 frame_counter_; // Number of frames seen. Used for initial adaptation. | 123 frame_counter_; // Number of frames seen. Used for initial adaptation. |
| 124 float max_window_dur_; // Largest search window size (seconds) | 124 float max_window_dur_; // Largest search window size (seconds) |
| 125 float sample_rate_; // Sampling rate. | 125 float sample_rate_; // Sampling rate. |
| 126 | 126 |
| 127 // Ring buffers to hold the speech activity history. | 127 // Ring buffers to hold the speech activity history. |
| 128 scoped_ptr<HistoryRing> history_; | 128 std::unique_ptr<HistoryRing> history_; |
| 129 | 129 |
| 130 // Configuration parameters. | 130 // Configuration parameters. |
| 131 EnergyEndpointerParams params_; | 131 EnergyEndpointerParams params_; |
| 132 | 132 |
| 133 // RMS which must be exceeded to conclude frame is speech. | 133 // RMS which must be exceeded to conclude frame is speech. |
| 134 float decision_threshold_; | 134 float decision_threshold_; |
| 135 | 135 |
| 136 // Flag to indicate that audio should be used to estimate environment, prior | 136 // Flag to indicate that audio should be used to estimate environment, prior |
| 137 // to receiving user input. | 137 // to receiving user input. |
| 138 bool estimating_environment_; | 138 bool estimating_environment_; |
| (...skipping 13 matching lines...) Expand all Loading... |
| 152 // Time when mode switched from environment estimation to user input. This | 152 // Time when mode switched from environment estimation to user input. This |
| 153 // is used to time forced rejection of audio feedback contamination. | 153 // is used to time forced rejection of audio feedback contamination. |
| 154 int64_t user_input_start_time_us_; | 154 int64_t user_input_start_time_us_; |
| 155 | 155 |
| 156 DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer); | 156 DISALLOW_COPY_AND_ASSIGN(EnergyEndpointer); |
| 157 }; | 157 }; |
| 158 | 158 |
| 159 } // namespace content | 159 } // namespace content |
| 160 | 160 |
| 161 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ | 161 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENERGY_ENDPOINTER_H_ |
| OLD | NEW |