chrome/browser/speech/endpointer/endpointer.h - Issue 3117026: Add an endpointer for detecting end of speech.

Unified Diff: chrome/browser/speech/endpointer/endpointer.h

Issue 3117026: Add an endpointer for detecting end of speech. (Closed)

Patch Set: Merged with latest. Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: chrome/browser/speech/endpointer/endpointer.h

diff --git a/chrome/browser/speech/endpointer/endpointer.h b/chrome/browser/speech/endpointer/endpointer.h

new file mode 100644

index 0000000000000000000000000000000000000000..79b316d0283a28a23f743c1853c49dce3f282bdd

--- /dev/null

+++ b/chrome/browser/speech/endpointer/endpointer.h

@@ -0,0 +1,137 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

+#define CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

+#include "base/basictypes.h"

+#include "chrome/browser/speech/endpointer/energy_endpointer.h"

+class EpStatus;

+namespace speech_input {

+// A simple interface to the underlying energy-endpointer implementation, this

+// class lets callers provide audio as being recorded and let them poll to find

+// when the user has stopped speaking.

+//

+// There are two events that may trigger the end of speech:

+//

+// speechInputPossiblyComplete event:

+//

+// Signals that silence/noise has been detected for a *short* amount of

+// time after some speech has been detected. It can be used for low latency

+// UI feedback. To disable it, set it to a large amount.

+//

+// speechInputComplete event:

+//

+// This event is intended to signal end of input and to stop recording.

+// The amount of time to wait after speech is set by

+// speech_input_complete_silence_length_ and optionally two other

+// parameters (see below).

+// This time can be held constant, or can change as more speech is detected.

+// In the latter case, the time changes after a set amount of time from the

+// *beginning* of speech. This is motivated by the expectation that there

+// will be two distinct types of inputs: short search queries and longer

+// dictation style input.

+//

+// Three parameters are used to define the piecewise constant timeout function.

+// The timeout length is speech_input_complete_silence_length until

+// long_speech_length, when it changes to

+// long_speech_input_complete_silence_length.

+class Endpointer {

+ public:

+ explicit Endpointer(int sample_rate);

+ // Start the endpointer. This should be called at the beginning of a session.

+ void StartSession();

+ // Stop the endpointer.

+ void EndSession();

+ // Start environment estimation. Audio will be used for environment estimation

+ // i.e. noise level estimation.

+ void SetEnvironmentEstimationMode();

+ // Start user input. This should be called when the user indicates start of

+ // input, e.g. by pressing a button.

+ void SetUserInputMode();

+ // Process a segment of audio, which may be more than one frame.

+ // The status of the last frame will be returned.

+ EpStatus ProcessAudio(const int16* audio_data, int num_samples);

+ // Get the status of the endpointer.

+ EpStatus Status(int64 *time_us);

+ void set_speech_input_complete_silence_length(int64 time_us) {

+ speech_input_complete_silence_length_us_ = time_us;

+ }

+ void set_long_speech_input_complete_silence_length(int64 time_us) {

+ long_speech_input_complete_silence_length_us_ = time_us;

+ }

+ void set_speech_input_possibly_complete_silence_length(int64 time_us) {

+ speech_input_possibly_complete_silence_length_us_ = time_us;

+ }

+ void set_long_speech_length(int64 time_us) {

+ long_speech_length_us_ = time_us;

+ }

+ bool speech_input_complete() const {

+ return speech_input_complete_;

+ }

+ private:

+ // Reset internal states. Helper method common to initial input utterance

+ // and following input utternaces.

+ void Reset();

+ // Minimum allowable length of speech input.

+ int64 speech_input_minimum_length_us_;

+ // The speechInputPossiblyComplete event signals that silence/noise has been

+ // detected for a *short* amount of time after some speech has been detected.

+ // This proporty specifies the time period.

+ int64 speech_input_possibly_complete_silence_length_us_;

+ // The speechInputComplete event signals that silence/noise has been

+ // detected for a *long* amount of time after some speech has been detected.

+ // This property specifies the time period.

+ int64 speech_input_complete_silence_length_us_;

+ // Same as above, this specifies the required silence period after speech

+ // detection. This period is used instead of

+ // speech_input_complete_silence_length_ when the utterance is longer than

+ // long_speech_length_. This parameter is optional.

+ int64 long_speech_input_complete_silence_length_us_;

+ // The period of time after which the endpointer should consider

+ // long_speech_input_complete_silence_length_ as a valid silence period

+ // instead of speech_input_complete_silence_length_. This parameter is

+ // optional.

+ int64 long_speech_length_us_;

+ // First speech onset time, used in determination of speech complete timeout.

+ int64 speech_start_time_us_;

+ // Most recent end time, used in determination of speech complete timeout.

+ int64 speech_end_time_us_;

+ int64 audio_frame_time_us_;

+ EpStatus old_ep_status_;

+ bool waiting_for_speech_possibly_complete_timeout_;

+ bool waiting_for_speech_complete_timeout_;

+ bool speech_previously_detected_;

+ bool speech_input_complete_;

+ EnergyEndpointer energy_endpointer_;

+ int sample_rate_;

+ int32 frame_size_;

+};

+} // namespace speech_input

+#endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

« no previous file with comments | « no previous file | chrome/browser/speech/endpointer/endpointer.cc » ('j') | chrome/browser/speech/speech_recognizer.h » ('J')