Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(419)

Side by Side Diff: chrome/browser/speech/endpointer/endpointer.h

Issue 3117026: Add an endpointer for detecting end of speech. (Closed)
Patch Set: Merged with latest. Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
6 #define CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
7
8 #include "base/basictypes.h"
9 #include "chrome/browser/speech/endpointer/energy_endpointer.h"
10
11 class EpStatus;
12
13 namespace speech_input {
14
15 // A simple interface to the underlying energy-endpointer implementation, this
16 // class lets callers provide audio as being recorded and let them poll to find
17 // when the user has stopped speaking.
18 //
19 // There are two events that may trigger the end of speech:
20 //
21 // speechInputPossiblyComplete event:
22 //
23 // Signals that silence/noise has been detected for a *short* amount of
24 // time after some speech has been detected. It can be used for low latency
25 // UI feedback. To disable it, set it to a large amount.
26 //
27 // speechInputComplete event:
28 //
29 // This event is intended to signal end of input and to stop recording.
30 // The amount of time to wait after speech is set by
31 // speech_input_complete_silence_length_ and optionally two other
32 // parameters (see below).
33 // This time can be held constant, or can change as more speech is detected.
34 // In the latter case, the time changes after a set amount of time from the
35 // *beginning* of speech. This is motivated by the expectation that there
36 // will be two distinct types of inputs: short search queries and longer
37 // dictation style input.
38 //
39 // Three parameters are used to define the piecewise constant timeout function.
40 // The timeout length is speech_input_complete_silence_length until
41 // long_speech_length, when it changes to
42 // long_speech_input_complete_silence_length.
43 class Endpointer {
44 public:
45 explicit Endpointer(int sample_rate);
46
47 // Start the endpointer. This should be called at the beginning of a session.
48 void StartSession();
49
50 // Stop the endpointer.
51 void EndSession();
52
53 // Start environment estimation. Audio will be used for environment estimation
54 // i.e. noise level estimation.
55 void SetEnvironmentEstimationMode();
56
57 // Start user input. This should be called when the user indicates start of
58 // input, e.g. by pressing a button.
59 void SetUserInputMode();
60
61 // Process a segment of audio, which may be more than one frame.
62 // The status of the last frame will be returned.
63 EpStatus ProcessAudio(const int16* audio_data, int num_samples);
64
65 // Get the status of the endpointer.
66 EpStatus Status(int64 *time_us);
67
68 void set_speech_input_complete_silence_length(int64 time_us) {
69 speech_input_complete_silence_length_us_ = time_us;
70 }
71
72 void set_long_speech_input_complete_silence_length(int64 time_us) {
73 long_speech_input_complete_silence_length_us_ = time_us;
74 }
75
76 void set_speech_input_possibly_complete_silence_length(int64 time_us) {
77 speech_input_possibly_complete_silence_length_us_ = time_us;
78 }
79
80 void set_long_speech_length(int64 time_us) {
81 long_speech_length_us_ = time_us;
82 }
83
84 bool speech_input_complete() const {
85 return speech_input_complete_;
86 }
87
88 private:
89 // Reset internal states. Helper method common to initial input utterance
90 // and following input utternaces.
91 void Reset();
92
93 // Minimum allowable length of speech input.
94 int64 speech_input_minimum_length_us_;
95
96 // The speechInputPossiblyComplete event signals that silence/noise has been
97 // detected for a *short* amount of time after some speech has been detected.
98 // This proporty specifies the time period.
99 int64 speech_input_possibly_complete_silence_length_us_;
100
101 // The speechInputComplete event signals that silence/noise has been
102 // detected for a *long* amount of time after some speech has been detected.
103 // This property specifies the time period.
104 int64 speech_input_complete_silence_length_us_;
105
106 // Same as above, this specifies the required silence period after speech
107 // detection. This period is used instead of
108 // speech_input_complete_silence_length_ when the utterance is longer than
109 // long_speech_length_. This parameter is optional.
110 int64 long_speech_input_complete_silence_length_us_;
111
112 // The period of time after which the endpointer should consider
113 // long_speech_input_complete_silence_length_ as a valid silence period
114 // instead of speech_input_complete_silence_length_. This parameter is
115 // optional.
116 int64 long_speech_length_us_;
117
118 // First speech onset time, used in determination of speech complete timeout.
119 int64 speech_start_time_us_;
120
121 // Most recent end time, used in determination of speech complete timeout.
122 int64 speech_end_time_us_;
123
124 int64 audio_frame_time_us_;
125 EpStatus old_ep_status_;
126 bool waiting_for_speech_possibly_complete_timeout_;
127 bool waiting_for_speech_complete_timeout_;
128 bool speech_previously_detected_;
129 bool speech_input_complete_;
130 EnergyEndpointer energy_endpointer_;
131 int sample_rate_;
132 int32 frame_size_;
133 };
134
135 } // namespace speech_input
136
137 #endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
OLDNEW
« no previous file with comments | « no previous file | chrome/browser/speech/endpointer/endpointer.cc » ('j') | chrome/browser/speech/speech_recognizer.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698