chrome/browser/speech/endpointer/endpointer.h - Issue 3117026: Add an endpointer for detecting end of speech.

Side by Side Diff: chrome/browser/speech/endpointer/endpointer.h

Issue 3117026: Add an endpointer for detecting end of speech. (Closed)

Patch Set: Merged with latest. Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #ifndef CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

	6 #define CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

	7

	8 #include "base/basictypes.h"

	9 #include "chrome/browser/speech/endpointer/energy_endpointer.h"

	10

	11 class EpStatus;

	12

	13 namespace speech_input {

	14

	15 // A simple interface to the underlying energy-endpointer implementation, this

	16 // class lets callers provide audio as being recorded and let them poll to find

	17 // when the user has stopped speaking.

	18 //

	19 // There are two events that may trigger the end of speech:

	20 //

	21 // speechInputPossiblyComplete event:

	22 //

	23 // Signals that silence/noise has been detected for a short amount of

	24 // time after some speech has been detected. It can be used for low latency

	25 // UI feedback. To disable it, set it to a large amount.

	26 //

	27 // speechInputComplete event:

	28 //

	29 // This event is intended to signal end of input and to stop recording.

	30 // The amount of time to wait after speech is set by

	31 // speech_input_complete_silence_length_ and optionally two other

	32 // parameters (see below).

	33 // This time can be held constant, or can change as more speech is detected.

	34 // In the latter case, the time changes after a set amount of time from the

	35 // beginning of speech. This is motivated by the expectation that there

	36 // will be two distinct types of inputs: short search queries and longer

	37 // dictation style input.

	38 //

	39 // Three parameters are used to define the piecewise constant timeout function.

	40 // The timeout length is speech_input_complete_silence_length until

	41 // long_speech_length, when it changes to

	42 // long_speech_input_complete_silence_length.

	43 class Endpointer {

	44 public:

	45 explicit Endpointer(int sample_rate);

	46

	47 // Start the endpointer. This should be called at the beginning of a session.

	48 void StartSession();

	49

	50 // Stop the endpointer.

	51 void EndSession();

	52

	53 // Start environment estimation. Audio will be used for environment estimation

	54 // i.e. noise level estimation.

	55 void SetEnvironmentEstimationMode();

	56

	57 // Start user input. This should be called when the user indicates start of

	58 // input, e.g. by pressing a button.

	59 void SetUserInputMode();

	60

	61 // Process a segment of audio, which may be more than one frame.

	62 // The status of the last frame will be returned.

	63 EpStatus ProcessAudio(const int16* audio_data, int num_samples);

	64

	65 // Get the status of the endpointer.

	66 EpStatus Status(int64 *time_us);

	67

	68 void set_speech_input_complete_silence_length(int64 time_us) {

	69 speech_input_complete_silence_length_us_ = time_us;

	70 }

	71

	72 void set_long_speech_input_complete_silence_length(int64 time_us) {

	73 long_speech_input_complete_silence_length_us_ = time_us;

	74 }

	75

	76 void set_speech_input_possibly_complete_silence_length(int64 time_us) {

	77 speech_input_possibly_complete_silence_length_us_ = time_us;

	78 }

	79

	80 void set_long_speech_length(int64 time_us) {

	81 long_speech_length_us_ = time_us;

	82 }

	83

	84 bool speech_input_complete() const {

	85 return speech_input_complete_;

	86 }

	87

	88 private:

	89 // Reset internal states. Helper method common to initial input utterance

	90 // and following input utternaces.

	91 void Reset();

	92

	93 // Minimum allowable length of speech input.

	94 int64 speech_input_minimum_length_us_;

	95

	96 // The speechInputPossiblyComplete event signals that silence/noise has been

	97 // detected for a short amount of time after some speech has been detected.

	98 // This proporty specifies the time period.

	99 int64 speech_input_possibly_complete_silence_length_us_;

	100

	101 // The speechInputComplete event signals that silence/noise has been

	102 // detected for a long amount of time after some speech has been detected.

	103 // This property specifies the time period.

	104 int64 speech_input_complete_silence_length_us_;

	105

	106 // Same as above, this specifies the required silence period after speech

	107 // detection. This period is used instead of

	108 // speech_input_complete_silence_length_ when the utterance is longer than

	109 // long_speech_length_. This parameter is optional.

	110 int64 long_speech_input_complete_silence_length_us_;

	111

	112 // The period of time after which the endpointer should consider

	113 // long_speech_input_complete_silence_length_ as a valid silence period

	114 // instead of speech_input_complete_silence_length_. This parameter is

	115 // optional.

	116 int64 long_speech_length_us_;

	117

	118 // First speech onset time, used in determination of speech complete timeout.

	119 int64 speech_start_time_us_;

	120

	121 // Most recent end time, used in determination of speech complete timeout.

	122 int64 speech_end_time_us_;

	123

	124 int64 audio_frame_time_us_;

	125 EpStatus old_ep_status_;

	126 bool waiting_for_speech_possibly_complete_timeout_;

	127 bool waiting_for_speech_complete_timeout_;

	128 bool speech_previously_detected_;

	129 bool speech_input_complete_;

	130 EnergyEndpointer energy_endpointer_;

	131 int sample_rate_;

	132 int32 frame_size_;

	133 };

	134

	135 } // namespace speech_input

	136

	137 #endif // CHROME_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

OLD	NEW

« no previous file with comments | « no previous file | chrome/browser/speech/endpointer/endpointer.cc » ('j') | chrome/browser/speech/speech_recognizer.h » ('J')