content/browser/speech/speech_recognizer_impl.h - Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed according to Satish review. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include "base/basictypes.h"	8 #include "base/basictypes.h"

9 #include "base/memory/ref_counted.h"	9 #include "base/memory/ref_counted.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

11 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

12 #include "content/browser/speech/speech_recognition_engine.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

13 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

14 #include "content/public/common/speech_recognition_error.h"	14 #include "content/public/common/speech_recognition_error.h"

	15 #include "content/public/common/speech_recognition_result.h"

15 #include "media/audio/audio_input_controller.h"	16 #include "media/audio/audio_input_controller.h"

16 #include "net/url_request/url_request_context_getter.h"	17 #include "net/url_request/url_request_context_getter.h"

17	18

18 namespace content {	19 namespace content {

19 class SpeechRecognitionEventListener;	20 class SpeechRecognitionEventListener;

20 struct SpeechRecognitionResult;	21 struct SpeechRecognitionResult;

21 }	22 }

22	23

23 namespace media {	24 namespace media {

24 class AudioInputController;	25 class AudioInputController;

25 class AudioManager;	26 class AudioManager;

26 }	27 }

27	28

28 namespace speech {	29 namespace speech {

29	30

30 // Records audio, sends recorded audio to server and translates server response	31 // TODO(primiano) Next CL: Remove the Impl suffix and the exported

31 // to recognition result.	32 // /content/public/browser/speech_recognizer.h interface since this class should
	jam 2012/04/12 16:22:59 that'd be great! (the fewer interfaces in content/ that'd be great! (the fewer interfaces in content/public the better). i'm curious, how would extensions use this?
	33 // not be visible outside (currently we need it for speech input extension API).

	34

	35 // Handles speech recognition for a session (identified by \|caller_id\|), taking

	36 // care of audio capture, silence detection/endpointer and interaction with the

	37 // SpeechRecognitionEngine.

32 class CONTENT_EXPORT SpeechRecognizerImpl	38 class CONTENT_EXPORT SpeechRecognizerImpl

33 : public NON_EXPORTED_BASE(content::SpeechRecognizer),	39 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

34 public media::AudioInputController::EventHandler,	40 public media::AudioInputController::EventHandler,

35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {	41 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {

36 public:	42 public:

37 static const int kAudioSampleRate;	43 static const int kAudioSampleRate;

38 static const ChannelLayout kChannelLayout;	44 static const ChannelLayout kChannelLayout;

39 static const int kNumBitsPerAudioSample;	45 static const int kNumBitsPerAudioSample;

40 static const int kNoSpeechTimeoutMs;	46 static const int kNoSpeechTimeoutMs;

41 static const int kEndpointerEstimationTimeMs;	47 static const int kEndpointerEstimationTimeMs;

42	48

43 SpeechRecognizerImpl(	49 SpeechRecognizerImpl(

44 content::SpeechRecognitionEventListener* listener,	50 content::SpeechRecognitionEventListener* listener,

45 int caller_id,	51 int caller_id,

46 const std::string& language,	52 SpeechRecognitionEngine* engine);

47 const std::string& grammar,

48 net::URLRequestContextGetter* context_getter,

49 bool filter_profanities,

50 const std::string& hardware_info,

51 const std::string& origin_url);

52 virtual ~SpeechRecognizerImpl();	53 virtual ~SpeechRecognizerImpl();

53	54

54 // content::SpeechRecognizer methods.	55 // content::SpeechRecognizer methods.

55 virtual void StartRecognition() OVERRIDE;	56 virtual void StartRecognition() OVERRIDE;

56 virtual void AbortRecognition() OVERRIDE;	57 virtual void AbortRecognition() OVERRIDE;

57 virtual void StopAudioCapture() OVERRIDE;	58 virtual void StopAudioCapture() OVERRIDE;

58 virtual bool IsActive() const OVERRIDE;	59 virtual bool IsActive() const OVERRIDE;

59 virtual bool IsCapturingAudio() const OVERRIDE;	60 virtual bool IsCapturingAudio() const OVERRIDE;

60 const SpeechRecognitionEngine& recognition_engine() const;	61 const SpeechRecognitionEngine& recognition_engine() const;

61	62

	63 private:

	64 friend class SpeechRecognizerImplTest;

	65

	66 enum FSMState {

	67 STATE_IDLE = 0,

	68 STATE_STARTING,

	69 STATE_ESTIMATING_ENVIRONMENT,

	70 STATE_WAITING_FOR_SPEECH,

	71 STATE_RECOGNIZING,

	72 STATE_WAITING_FINAL_RESULT,

	73 STATE_MAX = STATE_WAITING_FINAL_RESULT

	74 };

	75

	76 enum FSMEvent {

	77 EVENT_ABORT = 0,

	78 EVENT_START,

	79 EVENT_STOP_CAPTURE,

	80 EVENT_AUDIO_DATA,

	81 EVENT_ENGINE_RESULT,

	82 EVENT_ENGINE_ERROR,

	83 EVENT_AUDIO_ERROR,

	84 EVENT_MAX = EVENT_AUDIO_ERROR

	85 };

	86

	87 struct FSMEventArgs {

	88 explicit FSMEventArgs(FSMEvent event_value);

	89 ~FSMEventArgs();

	90

	91 FSMEvent event;

	92 int audio_error_code;

	93 scoped_refptr<AudioChunk> audio_data;

	94 content::SpeechRecognitionResult engine_result;

	95 content::SpeechRecognitionError engine_error;

	96 };

	97

	98 // Entry point for pushing any new external event into the recognizer FSM.

	99 void DispatchEvent(const FSMEventArgs& event_args);

	100

	101 // Defines the behavior of the recognizer FSM, selecting the appropriate

	102 // transition according to the current state and event.

	103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

	104

	105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).

	106 void ProcessAudioPipeline(const AudioChunk& raw_audio);

	107

	108 // The methods below handle transitions of the recognizer FSM.

	109 FSMState StartRecording(const FSMEventArgs& event_args);

	110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

	111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

	112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

	113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);

	114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

	115 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

	116 FSMState Abort(const FSMEventArgs& event_args);

	117 FSMState AbortWithError(const content::SpeechRecognitionError* error);

	118 FSMState AbortWithError(const content::SpeechRecognitionError& error);

	119 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);

	120 FSMState DoNothing(const FSMEventArgs& event_args) const;

	121 FSMState NotFeasible(const FSMEventArgs& event_args);

	122

	123 // Returns the time span of captured audio samples since the start of capture.

	124 int GetElapsedTimeMs() const;
	bulach 2012/04/12 16:20:16 nit: maybe use base::TimeDelta instead of specifyi nit: maybe use base::TimeDelta instead of specifying the time unit? Primiano Tucci (use gerrit) 2012/04/12 17:38:05 Hmm it is not a true time (not derived by system c Show quoted text On 2012/04/12 16:20:16, bulach wrote: > nit: maybe use base::TimeDelta instead of specifying the time unit? Hmm it is not a true time (not derived by system clock) but an "ideal" measurement based on the amount of audio frames received (which stands on the assumption that audio is captured at the specified rate), thus it is always integral and multiple of a known base.
	125

	126 // Calculates the input volume to be displayed in the UI, triggering the

	127 // OnAudioLevelsChange event accordingly.

	128 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);

	129

	130 void CloseAudioControllerAsynchronously();

	131 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

	132

62 // AudioInputController::EventHandler methods.	133 // AudioInputController::EventHandler methods.

63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	134 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	135 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

65 virtual void OnError(media::AudioInputController* controller,	136 virtual void OnError(media::AudioInputController* controller,

66 int error_code) OVERRIDE;	137 int error_code) OVERRIDE;

67 virtual void OnData(media::AudioInputController* controller,	138 virtual void OnData(media::AudioInputController* controller,

68 const uint8* data,	139 const uint8* data, uint32 size) OVERRIDE;

69 uint32 size) OVERRIDE;	140

	141 // Callback called on IO thread by audio_controller->Close().

	142 void OnAudioClosed(media::AudioInputController*);
	bulach 2012/04/12 16:20:16 nit: move this method up before the virtuals nit: move this method up before the virtuals Primiano Tucci (use gerrit) 2012/04/12 17:38:05 Oops, sorry, did it again! :/ Show quoted text On 2012/04/12 16:20:16, bulach wrote: > nit: move this method up before the virtuals Oops, sorry, did it again! :/
70	143

71 // SpeechRecognitionEngineDelegate methods.	144 // SpeechRecognitionEngineDelegate methods.

72 virtual void OnSpeechRecognitionEngineResult(	145 virtual void OnSpeechRecognitionEngineResult(

73 const content::SpeechRecognitionResult& result) OVERRIDE;	146 const content::SpeechRecognitionResult& result) OVERRIDE;

74 virtual void OnSpeechRecognitionEngineError(	147 virtual void OnSpeechRecognitionEngineError(

75 const content::SpeechRecognitionError& error) OVERRIDE;	148 const content::SpeechRecognitionError& error) OVERRIDE;

76	149

77 private:

78 friend class SpeechRecognizerImplTest;

79

80 void InformErrorAndAbortRecognition(

81 content::SpeechRecognitionErrorCode error);

82 void SendRecordedAudioToServer();

83

84 void HandleOnError(int error_code); // Handles OnError in the IO thread.

85

86 // Handles OnData in the IO thread.

87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);

88

89 void OnAudioClosed(media::AudioInputController*);

90

91 // Helper method which closes the audio controller and frees it asynchronously

92 // without blocking the IO thread.

93 void CloseAudioControllerAsynchronously();

94

95 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

96

97 content::SpeechRecognitionEventListener* listener_;	150 content::SpeechRecognitionEventListener* listener_;

98 media::AudioManager* testing_audio_manager_;	151 media::AudioManager* testing_audio_manager_;

99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;	152 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

100 Endpointer endpointer_;	153 Endpointer endpointer_;

101 scoped_refptr<media::AudioInputController> audio_controller_;	154 scoped_refptr<media::AudioInputController> audio_controller_;

102 scoped_refptr<net::URLRequestContextGetter> context_getter_;

103 int caller_id_;	155 int caller_id_;

104 std::string language_;

105 std::string grammar_;

106 bool filter_profanities_;

107 std::string hardware_info_;

108 std::string origin_url_;

109 int num_samples_recorded_;	156 int num_samples_recorded_;

110 float audio_level_;	157 float audio_level_;

	158 bool is_dispatching_event_;

	159 FSMState state_;

111	160

112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	161 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

113 };	162 };

114	163

115 } // namespace speech	164 } // namespace speech

116	165

117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	166 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW

« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')