| Index: chrome/browser/speech/endpointer/endpointer.h
|
| diff --git a/chrome/browser/speech/endpointer/endpointer.h b/chrome/browser/speech/endpointer/endpointer.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..79b316d0283a28a23f743c1853c49dce3f282bdd
|
| --- /dev/null
|
| +++ b/chrome/browser/speech/endpointer/endpointer.h
|
| @@ -0,0 +1,137 @@
|
| +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
|
| +#define CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
|
| +
|
| +#include "base/basictypes.h"
|
| +#include "chrome/browser/speech/endpointer/energy_endpointer.h"
|
| +
|
| +class EpStatus;
|
| +
|
| +namespace speech_input {
|
| +
|
| +// A simple interface to the underlying energy-endpointer implementation, this
|
| +// class lets callers provide audio as being recorded and let them poll to find
|
| +// when the user has stopped speaking.
|
| +//
|
| +// There are two events that may trigger the end of speech:
|
| +//
|
| +// speechInputPossiblyComplete event:
|
| +//
|
| +// Signals that silence/noise has been detected for a *short* amount of
|
| +// time after some speech has been detected. It can be used for low latency
|
| +// UI feedback. To disable it, set it to a large amount.
|
| +//
|
| +// speechInputComplete event:
|
| +//
|
| +// This event is intended to signal end of input and to stop recording.
|
| +// The amount of time to wait after speech is set by
|
| +// speech_input_complete_silence_length_ and optionally two other
|
| +// parameters (see below).
|
| +// This time can be held constant, or can change as more speech is detected.
|
| +// In the latter case, the time changes after a set amount of time from the
|
| +// *beginning* of speech. This is motivated by the expectation that there
|
| +// will be two distinct types of inputs: short search queries and longer
|
| +// dictation style input.
|
| +//
|
| +// Three parameters are used to define the piecewise constant timeout function.
|
| +// The timeout length is speech_input_complete_silence_length until
|
| +// long_speech_length, when it changes to
|
| +// long_speech_input_complete_silence_length.
|
| +class Endpointer {
|
| + public:
|
| + explicit Endpointer(int sample_rate);
|
| +
|
| + // Start the endpointer. This should be called at the beginning of a session.
|
| + void StartSession();
|
| +
|
| + // Stop the endpointer.
|
| + void EndSession();
|
| +
|
| + // Start environment estimation. Audio will be used for environment estimation
|
| + // i.e. noise level estimation.
|
| + void SetEnvironmentEstimationMode();
|
| +
|
| + // Start user input. This should be called when the user indicates start of
|
| + // input, e.g. by pressing a button.
|
| + void SetUserInputMode();
|
| +
|
| + // Process a segment of audio, which may be more than one frame.
|
| + // The status of the last frame will be returned.
|
| + EpStatus ProcessAudio(const int16* audio_data, int num_samples);
|
| +
|
| + // Get the status of the endpointer.
|
| + EpStatus Status(int64 *time_us);
|
| +
|
| + void set_speech_input_complete_silence_length(int64 time_us) {
|
| + speech_input_complete_silence_length_us_ = time_us;
|
| + }
|
| +
|
| + void set_long_speech_input_complete_silence_length(int64 time_us) {
|
| + long_speech_input_complete_silence_length_us_ = time_us;
|
| + }
|
| +
|
| + void set_speech_input_possibly_complete_silence_length(int64 time_us) {
|
| + speech_input_possibly_complete_silence_length_us_ = time_us;
|
| + }
|
| +
|
| + void set_long_speech_length(int64 time_us) {
|
| + long_speech_length_us_ = time_us;
|
| + }
|
| +
|
| + bool speech_input_complete() const {
|
| + return speech_input_complete_;
|
| + }
|
| +
|
| + private:
|
| + // Reset internal states. Helper method common to initial input utterance
|
| + // and following input utternaces.
|
| + void Reset();
|
| +
|
| + // Minimum allowable length of speech input.
|
| + int64 speech_input_minimum_length_us_;
|
| +
|
| + // The speechInputPossiblyComplete event signals that silence/noise has been
|
| + // detected for a *short* amount of time after some speech has been detected.
|
| + // This proporty specifies the time period.
|
| + int64 speech_input_possibly_complete_silence_length_us_;
|
| +
|
| + // The speechInputComplete event signals that silence/noise has been
|
| + // detected for a *long* amount of time after some speech has been detected.
|
| + // This property specifies the time period.
|
| + int64 speech_input_complete_silence_length_us_;
|
| +
|
| + // Same as above, this specifies the required silence period after speech
|
| + // detection. This period is used instead of
|
| + // speech_input_complete_silence_length_ when the utterance is longer than
|
| + // long_speech_length_. This parameter is optional.
|
| + int64 long_speech_input_complete_silence_length_us_;
|
| +
|
| + // The period of time after which the endpointer should consider
|
| + // long_speech_input_complete_silence_length_ as a valid silence period
|
| + // instead of speech_input_complete_silence_length_. This parameter is
|
| + // optional.
|
| + int64 long_speech_length_us_;
|
| +
|
| + // First speech onset time, used in determination of speech complete timeout.
|
| + int64 speech_start_time_us_;
|
| +
|
| + // Most recent end time, used in determination of speech complete timeout.
|
| + int64 speech_end_time_us_;
|
| +
|
| + int64 audio_frame_time_us_;
|
| + EpStatus old_ep_status_;
|
| + bool waiting_for_speech_possibly_complete_timeout_;
|
| + bool waiting_for_speech_complete_timeout_;
|
| + bool speech_previously_detected_;
|
| + bool speech_input_complete_;
|
| + EnergyEndpointer energy_endpointer_;
|
| + int sample_rate_;
|
| + int32 frame_size_;
|
| +};
|
| +
|
| +} // namespace speech_input
|
| +
|
| +#endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
|
|
|