OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_ |
| 6 #define CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_ |
| 7 |
| 8 #include "base/basictypes.h" |
| 9 #include "chrome/browser/speech/endpointer/energy_endpointer.h" |
| 10 |
| 11 class EpStatus; |
| 12 |
| 13 namespace speech_input { |
| 14 |
| 15 // A simple interface to the underlying energy-endpointer implementation, this |
| 16 // class lets callers provide audio as being recorded and let them poll to find |
| 17 // when the user has stopped speaking. |
| 18 // |
| 19 // There are two events that may trigger the end of speech: |
| 20 // |
| 21 // speechInputPossiblyComplete event: |
| 22 // |
| 23 // Signals that silence/noise has been detected for a *short* amount of |
| 24 // time after some speech has been detected. It can be used for low latency |
| 25 // UI feedback. To disable it, set it to a large amount. |
| 26 // |
| 27 // speechInputComplete event: |
| 28 // |
| 29 // This event is intended to signal end of input and to stop recording. |
| 30 // The amount of time to wait after speech is set by |
| 31 // speech_input_complete_silence_length_ and optionally two other |
| 32 // parameters (see below). |
| 33 // This time can be held constant, or can change as more speech is detected. |
| 34 // In the latter case, the time changes after a set amount of time from the |
| 35 // *beginning* of speech. This is motivated by the expectation that there |
| 36 // will be two distinct types of inputs: short search queries and longer |
| 37 // dictation style input. |
| 38 // |
| 39 // Three parameters are used to define the piecewise constant timeout function. |
| 40 // The timeout length is speech_input_complete_silence_length until |
| 41 // long_speech_length, when it changes to |
| 42 // long_speech_input_complete_silence_length. |
| 43 class Endpointer { |
| 44 public: |
| 45 explicit Endpointer(int sample_rate); |
| 46 |
| 47 // Start the endpointer. This should be called at the beginning of a session. |
| 48 void StartSession(); |
| 49 |
| 50 // Stop the endpointer. |
| 51 void EndSession(); |
| 52 |
| 53 // Start environment estimation. Audio will be used for environment estimation |
| 54 // i.e. noise level estimation. |
| 55 void SetEnvironmentEstimationMode(); |
| 56 |
| 57 // Start user input. This should be called when the user indicates start of |
| 58 // input, e.g. by pressing a button. |
| 59 void SetUserInputMode(); |
| 60 |
| 61 // Process a segment of audio, which may be more than one frame. |
| 62 // The status of the last frame will be returned. |
| 63 EpStatus ProcessAudio(const int16* audio_data, int num_samples); |
| 64 |
| 65 // Get the status of the endpointer. |
| 66 EpStatus Status(int64 *time_us); |
| 67 |
| 68 void set_speech_input_complete_silence_length(int64 time_us) { |
| 69 speech_input_complete_silence_length_us_ = time_us; |
| 70 } |
| 71 |
| 72 void set_long_speech_input_complete_silence_length(int64 time_us) { |
| 73 long_speech_input_complete_silence_length_us_ = time_us; |
| 74 } |
| 75 |
| 76 void set_speech_input_possibly_complete_silence_length(int64 time_us) { |
| 77 speech_input_possibly_complete_silence_length_us_ = time_us; |
| 78 } |
| 79 |
| 80 void set_long_speech_length(int64 time_us) { |
| 81 long_speech_length_us_ = time_us; |
| 82 } |
| 83 |
| 84 bool speech_input_complete() const { |
| 85 return speech_input_complete_; |
| 86 } |
| 87 |
| 88 private: |
| 89 // Reset internal states. Helper method common to initial input utterance |
| 90 // and following input utternaces. |
| 91 void Reset(); |
| 92 |
| 93 // Minimum allowable length of speech input. |
| 94 int64 speech_input_minimum_length_us_; |
| 95 |
| 96 // The speechInputPossiblyComplete event signals that silence/noise has been |
| 97 // detected for a *short* amount of time after some speech has been detected. |
| 98 // This proporty specifies the time period. |
| 99 int64 speech_input_possibly_complete_silence_length_us_; |
| 100 |
| 101 // The speechInputComplete event signals that silence/noise has been |
| 102 // detected for a *long* amount of time after some speech has been detected. |
| 103 // This property specifies the time period. |
| 104 int64 speech_input_complete_silence_length_us_; |
| 105 |
| 106 // Same as above, this specifies the required silence period after speech |
| 107 // detection. This period is used instead of |
| 108 // speech_input_complete_silence_length_ when the utterance is longer than |
| 109 // long_speech_length_. This parameter is optional. |
| 110 int64 long_speech_input_complete_silence_length_us_; |
| 111 |
| 112 // The period of time after which the endpointer should consider |
| 113 // long_speech_input_complete_silence_length_ as a valid silence period |
| 114 // instead of speech_input_complete_silence_length_. This parameter is |
| 115 // optional. |
| 116 int64 long_speech_length_us_; |
| 117 |
| 118 // First speech onset time, used in determination of speech complete timeout. |
| 119 int64 speech_start_time_us_; |
| 120 |
| 121 // Most recent end time, used in determination of speech complete timeout. |
| 122 int64 speech_end_time_us_; |
| 123 |
| 124 int64 audio_frame_time_us_; |
| 125 EpStatus old_ep_status_; |
| 126 bool waiting_for_speech_possibly_complete_timeout_; |
| 127 bool waiting_for_speech_complete_timeout_; |
| 128 bool speech_previously_detected_; |
| 129 bool speech_input_complete_; |
| 130 EnergyEndpointer energy_endpointer_; |
| 131 int sample_rate_; |
| 132 int32 frame_size_; |
| 133 }; |
| 134 |
| 135 } // namespace speech_input |
| 136 |
| 137 #endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_ |
OLD | NEW |