content/browser/speech/speech_recognizer_impl.h - Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed according to Bulach review (+ win compile error). Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include "base/basictypes.h"	8 #include "base/basictypes.h"

9 #include "base/memory/ref_counted.h"	9 #include "base/memory/ref_counted.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

11 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

12 #include "content/browser/speech/speech_recognition_engine.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

13 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

14 #include "content/public/common/speech_recognition_error.h"	14 #include "content/public/common/speech_recognition_error.h"

	15 #include "content/public/common/speech_recognition_result.h"

15 #include "media/audio/audio_input_controller.h"	16 #include "media/audio/audio_input_controller.h"

16 #include "net/url_request/url_request_context_getter.h"	17 #include "net/url_request/url_request_context_getter.h"

17	18

18 namespace content {	19 namespace content {

19 class SpeechRecognitionEventListener;	20 class SpeechRecognitionEventListener;

20 struct SpeechRecognitionResult;	21 struct SpeechRecognitionResult;

21 }	22 }

22	23

23 namespace media {	24 namespace media {

24 class AudioInputController;	25 class AudioInputController;

25 class AudioManager;	26 class AudioManager;

26 }	27 }

27	28

28 namespace speech {	29 namespace speech {

29	30

30 // Records audio, sends recorded audio to server and translates server response	31 // TODO(primiano) Next CL: Remove the Impl suffix and the exported

31 // to recognition result.	32 // /content/public/browser/speech_recognizer.h interface since this class should

	33 // not be visible outside (currently we need it for speech input extension API).

	34

	35 // Handles speech recognition for a session (identified by \|caller_id\|), taking

	36 // care of audio capture, silence detection/endpointer and interaction with the

	37 // SpeechRecognitionEngine.

32 class CONTENT_EXPORT SpeechRecognizerImpl	38 class CONTENT_EXPORT SpeechRecognizerImpl

33 : public NON_EXPORTED_BASE(content::SpeechRecognizer),	39 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

34 public media::AudioInputController::EventHandler,	40 public media::AudioInputController::EventHandler,

35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {	41 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {

36 public:	42 public:

37 static const int kAudioSampleRate;	43 static const int kAudioSampleRate;

38 static const ChannelLayout kChannelLayout;	44 static const ChannelLayout kChannelLayout;

39 static const int kNumBitsPerAudioSample;	45 static const int kNumBitsPerAudioSample;

40 static const int kNoSpeechTimeoutMs;	46 static const int kNoSpeechTimeoutMs;

41 static const int kEndpointerEstimationTimeMs;	47 static const int kEndpointerEstimationTimeMs;

42	48

43 SpeechRecognizerImpl(	49 SpeechRecognizerImpl(

44 content::SpeechRecognitionEventListener* listener,	50 content::SpeechRecognitionEventListener* listener,

45 int caller_id,	51 int caller_id,

46 const std::string& language,	52 SpeechRecognitionEngine* engine);

47 const std::string& grammar,

48 net::URLRequestContextGetter* context_getter,

49 bool filter_profanities,

50 const std::string& hardware_info,

51 const std::string& origin_url);

52 virtual ~SpeechRecognizerImpl();	53 virtual ~SpeechRecognizerImpl();

53	54

54 // content::SpeechRecognizer methods.	55 // content::SpeechRecognizer methods.

55 virtual void StartRecognition() OVERRIDE;	56 virtual void StartRecognition() OVERRIDE;

56 virtual void AbortRecognition() OVERRIDE;	57 virtual void AbortRecognition() OVERRIDE;

57 virtual void StopAudioCapture() OVERRIDE;	58 virtual void StopAudioCapture() OVERRIDE;

58 virtual bool IsActive() const OVERRIDE;	59 virtual bool IsActive() const OVERRIDE;

59 virtual bool IsCapturingAudio() const OVERRIDE;	60 virtual bool IsCapturingAudio() const OVERRIDE;

60 const SpeechRecognitionEngine& recognition_engine() const;	61 const SpeechRecognitionEngine& recognition_engine() const;

61	62

	63 private:

	64 friend class SpeechRecognizerImplTest;

	65

	66 enum FSMState {

	67 STATE_IDLE = 0,

	68 STATE_STARTING,

	69 STATE_ESTIMATING_ENVIRONMENT,

	70 STATE_WAITING_FOR_SPEECH,

	71 STATE_RECOGNIZING,

	72 STATE_WAITING_FINAL_RESULT,

	73 STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
	Primiano Tucci (use gerrit) 2012/04/12 17:38:06 Renamed due to a name clash on windows (STATE_MAX Renamed due to a name clash on windows (STATE_MAX defined in windows.h).
	74 };

	75

	76 enum FSMEvent {

	77 EVENT_ABORT = 0,

	78 EVENT_START,

	79 EVENT_STOP_CAPTURE,

	80 EVENT_AUDIO_DATA,

	81 EVENT_ENGINE_RESULT,

	82 EVENT_ENGINE_ERROR,

	83 EVENT_AUDIO_ERROR,

	84 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR

	85 };

	86

	87 struct FSMEventArgs {

	88 explicit FSMEventArgs(FSMEvent event_value);

	89 ~FSMEventArgs();

	90

	91 FSMEvent event;

	92 int audio_error_code;

	93 scoped_refptr<AudioChunk> audio_data;

	94 content::SpeechRecognitionResult engine_result;

	95 content::SpeechRecognitionError engine_error;

	96 };

	97

	98 // Entry point for pushing any new external event into the recognizer FSM.

	99 void DispatchEvent(const FSMEventArgs& event_args);

	100

	101 // Defines the behavior of the recognizer FSM, selecting the appropriate

	102 // transition according to the current state and event.

	103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

	104

	105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).

	106 void ProcessAudioPipeline(const AudioChunk& raw_audio);

	107

	108 // The methods below handle transitions of the recognizer FSM.

	109 FSMState StartRecording(const FSMEventArgs& event_args);

	110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

	111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

	112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

	113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);

	114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

	115 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

	116 FSMState Abort(const FSMEventArgs& event_args);

	117 FSMState AbortWithError(const content::SpeechRecognitionError* error);

	118 FSMState AbortWithError(const content::SpeechRecognitionError& error);

	119 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);

	120 FSMState DoNothing(const FSMEventArgs& event_args) const;

	121 FSMState NotFeasible(const FSMEventArgs& event_args);

	122

	123 // Returns the time span of captured audio samples since the start of capture.

	124 int GetElapsedTimeMs() const;

	125

	126 // Calculates the input volume to be displayed in the UI, triggering the

	127 // OnAudioLevelsChange event accordingly.

	128 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);

	129

	130 void CloseAudioControllerAsynchronously();

	131 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

	132

	133 // Callback called on IO thread by audio_controller->Close().

	134 void OnAudioClosed(media::AudioInputController*);

	135

62 // AudioInputController::EventHandler methods.	136 // AudioInputController::EventHandler methods.

63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	137 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	138 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

65 virtual void OnError(media::AudioInputController* controller,	139 virtual void OnError(media::AudioInputController* controller,

66 int error_code) OVERRIDE;	140 int error_code) OVERRIDE;

67 virtual void OnData(media::AudioInputController* controller,	141 virtual void OnData(media::AudioInputController* controller,

68 const uint8* data,	142 const uint8* data, uint32 size) OVERRIDE;

69 uint32 size) OVERRIDE;

70	143

71 // SpeechRecognitionEngineDelegate methods.	144 // SpeechRecognitionEngineDelegate methods.

72 virtual void OnSpeechRecognitionEngineResult(	145 virtual void OnSpeechRecognitionEngineResult(

73 const content::SpeechRecognitionResult& result) OVERRIDE;	146 const content::SpeechRecognitionResult& result) OVERRIDE;

74 virtual void OnSpeechRecognitionEngineError(	147 virtual void OnSpeechRecognitionEngineError(

75 const content::SpeechRecognitionError& error) OVERRIDE;	148 const content::SpeechRecognitionError& error) OVERRIDE;

76	149

77 private:

78 friend class SpeechRecognizerImplTest;

79

80 void InformErrorAndAbortRecognition(

81 content::SpeechRecognitionErrorCode error);

82 void SendRecordedAudioToServer();

83

84 void HandleOnError(int error_code); // Handles OnError in the IO thread.

85

86 // Handles OnData in the IO thread.

87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);

88

89 void OnAudioClosed(media::AudioInputController*);

90

91 // Helper method which closes the audio controller and frees it asynchronously

92 // without blocking the IO thread.

93 void CloseAudioControllerAsynchronously();

94

95 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

96

97 content::SpeechRecognitionEventListener* listener_;	150 content::SpeechRecognitionEventListener* listener_;

98 media::AudioManager* testing_audio_manager_;	151 media::AudioManager* testing_audio_manager_;

99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;	152 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

100 Endpointer endpointer_;	153 Endpointer endpointer_;

101 scoped_refptr<media::AudioInputController> audio_controller_;	154 scoped_refptr<media::AudioInputController> audio_controller_;

102 scoped_refptr<net::URLRequestContextGetter> context_getter_;

103 int caller_id_;	155 int caller_id_;

104 std::string language_;

105 std::string grammar_;

106 bool filter_profanities_;

107 std::string hardware_info_;

108 std::string origin_url_;

109 int num_samples_recorded_;	156 int num_samples_recorded_;

110 float audio_level_;	157 float audio_level_;

	158 bool is_dispatching_event_;

	159 FSMState state_;

111	160

112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	161 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

113 };	162 };

114	163

115 } // namespace speech	164 } // namespace speech

116	165

117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	166 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW

« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | no next file with comments »