content/browser/speech/speech_recognizer_impl.h - Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed according to Bulach review. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include "base/basictypes.h"	8 #include "base/basictypes.h"

9 #include "base/memory/ref_counted.h"	9 #include "base/memory/ref_counted.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

11 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

12 #include "content/browser/speech/speech_recognition_engine.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

13 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

14 #include "content/public/common/speech_recognition_error.h"	14 #include "content/public/common/speech_recognition_error.h"

	15 #include "content/public/common/speech_recognition_result.h"

15 #include "media/audio/audio_input_controller.h"	16 #include "media/audio/audio_input_controller.h"

16 #include "net/url_request/url_request_context_getter.h"	17 #include "net/url_request/url_request_context_getter.h"

17	18

18 namespace content {	19 namespace content {

19 class SpeechRecognitionEventListener;	20 class SpeechRecognitionEventListener;

20 struct SpeechRecognitionResult;	21 struct SpeechRecognitionResult;

21 }	22 }

22	23

23 namespace media {	24 namespace media {

24 class AudioInputController;	25 class AudioInputController;

25 class AudioManager;	26 class AudioManager;

26 }	27 }

27	28

28 namespace speech {	29 namespace speech {

	30 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
	Satish 2012/04/12 08:58:33 add newline above full length comments such as thi add newline above full length comments such as this Primiano Tucci (use gerrit) 2012/04/12 12:56:48 Done. Show quoted text On 2012/04/12 08:58:33, Satish wrote: > add newline above full length comments such as this Done.
	31 // /content/public/browser/speech_recognizer.h interface since this class should

	32 // not be visible outside (currently we need it for speech input extension API).

29	33

30 // Records audio, sends recorded audio to server and translates server response	34 // Handles speech recognition for a session (identified by \|caller_id\|), taking

31 // to recognition result.	35 // care of audio capture, silence detection/endpointer and interaction with the

	36 // SpeechRecognitionEngine.

32 class CONTENT_EXPORT SpeechRecognizerImpl	37 class CONTENT_EXPORT SpeechRecognizerImpl

33 : public NON_EXPORTED_BASE(content::SpeechRecognizer),	38 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

34 public media::AudioInputController::EventHandler,	39 public media::AudioInputController::EventHandler,

35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {	40 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {

36 public:	41 public:

37 static const int kAudioSampleRate;	42 static const int kAudioSampleRate;

38 static const ChannelLayout kChannelLayout;	43 static const ChannelLayout kChannelLayout;

39 static const int kNumBitsPerAudioSample;	44 static const int kNumBitsPerAudioSample;

40 static const int kNoSpeechTimeoutMs;	45 static const int kNoSpeechTimeoutMs;

41 static const int kEndpointerEstimationTimeMs;	46 static const int kEndpointerEstimationTimeMs;

42	47

43 SpeechRecognizerImpl(	48 SpeechRecognizerImpl(

44 content::SpeechRecognitionEventListener* listener,	49 content::SpeechRecognitionEventListener* listener,

45 int caller_id,	50 int caller_id,

46 const std::string& language,	51 SpeechRecognitionEngine* engine);

47 const std::string& grammar,

48 net::URLRequestContextGetter* context_getter,

49 bool filter_profanities,

50 const std::string& hardware_info,

51 const std::string& origin_url);

52 virtual ~SpeechRecognizerImpl();	52 virtual ~SpeechRecognizerImpl();

53	53

54 // content::SpeechRecognizer methods.	54 // content::SpeechRecognizer methods.

55 virtual void StartRecognition() OVERRIDE;	55 virtual void StartRecognition() OVERRIDE;

56 virtual void AbortRecognition() OVERRIDE;	56 virtual void AbortRecognition() OVERRIDE;

57 virtual void StopAudioCapture() OVERRIDE;	57 virtual void StopAudioCapture() OVERRIDE;

58 virtual bool IsActive() const OVERRIDE;	58 virtual bool IsActive() const OVERRIDE;

59 virtual bool IsCapturingAudio() const OVERRIDE;	59 virtual bool IsCapturingAudio() const OVERRIDE;

60 const SpeechRecognitionEngine& recognition_engine() const;	60 const SpeechRecognitionEngine& recognition_engine() const;

61	61

	62 private:

	63 friend class SpeechRecognizerImplTest;

	64

	65 enum FSMState {

	66 STATE_IDLE = 0,

	67 STATE_STARTING,

	68 STATE_ESTIMATING_ENVIRONMENT,

	69 STATE_WAITING_FOR_SPEECH,

	70 STATE_RECOGNIZING,

	71 STATE_WAITING_FINAL_RESULT,

	72 STATE_MAX = STATE_WAITING_FINAL_RESULT

	73 };

	74

	75 enum FSMEvent {

	76 EVENT_ABORT = 0,

	77 EVENT_START,

	78 EVENT_STOP_CAPTURE,

	79 EVENT_AUDIO_DATA,

	80 EVENT_ENGINE_RESULT,

	81 EVENT_ENGINE_ERROR,

	82 EVENT_AUDIO_ERROR,

	83 EVENT_MAX = EVENT_AUDIO_ERROR

	84 };

	85

	86 struct FSMEventArgs {

	87 explicit FSMEventArgs(FSMEvent event_value);

	88 ~FSMEventArgs();

	89

	90 FSMEvent event;

	91 int audio_error_code;

	92 scoped_refptr<AudioChunk> audio_data;

	93 content::SpeechRecognitionResult engine_result;

	94 content::SpeechRecognitionError engine_error;

	95 };

	96

	97 void DispatchEvent(const FSMEventArgs& event_args);

	98 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

	99 void ProcessAudioPipeline(const AudioChunk& raw_audio);

	100 FSMState StartRecording(const FSMEventArgs& event_args);

	101 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

	102 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

	103 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

	104 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);

	105 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

	106 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

	107 FSMState Abort(const FSMEventArgs& event_args);

	108 FSMState AbortWithError(const content::SpeechRecognitionError* error);

	109 FSMState AbortWithError(const content::SpeechRecognitionError& error);

	110 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);

	111 FSMState DoNothing(const FSMEventArgs& event_args) const;

	112 FSMState NotFeasible(const FSMEventArgs& event_args);

	113 int GetElapsedTimeMs() const;
	Satish 2012/04/12 08:58:33 can we separate logically the above methods from b can we separate logically the above methods from below and add comments explaining that the above ones handle the FSM transitions? Also add individual comments for the ones from this line below (e.g. what does GetElapsedTimeMs calculate, what does UpdateSignalAndNoiseLevels update etc) Primiano Tucci (use gerrit) 2012/04/12 12:56:48 Done. Show quoted text On 2012/04/12 08:58:33, Satish wrote: > can we separate logically the above methods from below and add comments > explaining that the above ones handle the FSM transitions? Also add individual > comments for the ones from this line below (e.g. what does GetElapsedTimeMs > calculate, what does UpdateSignalAndNoiseLevels update etc) Done.
	114 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);

	115 void CloseAudioControllerAsynchronously();

	116 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

	117

62 // AudioInputController::EventHandler methods.	118 // AudioInputController::EventHandler methods.

63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	119 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	120 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

65 virtual void OnError(media::AudioInputController* controller,	121 virtual void OnError(media::AudioInputController* controller,

66 int error_code) OVERRIDE;	122 int error_code) OVERRIDE;

67 virtual void OnData(media::AudioInputController* controller,	123 virtual void OnData(media::AudioInputController* controller,

68 const uint8* data,	124 const uint8* data, uint32 size) OVERRIDE;

69 uint32 size) OVERRIDE;	125

	126 // Callback called on IO thread by audio_controller->Close().

	127 void OnAudioClosed(media::AudioInputController*);

70	128

71 // SpeechRecognitionEngineDelegate methods.	129 // SpeechRecognitionEngineDelegate methods.

72 virtual void OnSpeechRecognitionEngineResult(	130 virtual void OnSpeechRecognitionEngineResult(

73 const content::SpeechRecognitionResult& result) OVERRIDE;	131 const content::SpeechRecognitionResult& result) OVERRIDE;

74 virtual void OnSpeechRecognitionEngineError(	132 virtual void OnSpeechRecognitionEngineError(

75 const content::SpeechRecognitionError& error) OVERRIDE;	133 const content::SpeechRecognitionError& error) OVERRIDE;

76	134

77 private:

78 friend class SpeechRecognizerImplTest;

79

80 void InformErrorAndAbortRecognition(

81 content::SpeechRecognitionErrorCode error);

82 void SendRecordedAudioToServer();

83

84 void HandleOnError(int error_code); // Handles OnError in the IO thread.

85

86 // Handles OnData in the IO thread.

87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);

88

89 void OnAudioClosed(media::AudioInputController*);

90

91 // Helper method which closes the audio controller and frees it asynchronously

92 // without blocking the IO thread.

93 void CloseAudioControllerAsynchronously();

94

95 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

96

97 content::SpeechRecognitionEventListener* listener_;	135 content::SpeechRecognitionEventListener* listener_;

98 media::AudioManager* testing_audio_manager_;	136 media::AudioManager* testing_audio_manager_;

99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;	137 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

100 Endpointer endpointer_;	138 Endpointer endpointer_;

101 scoped_refptr<media::AudioInputController> audio_controller_;	139 scoped_refptr<media::AudioInputController> audio_controller_;

102 scoped_refptr<net::URLRequestContextGetter> context_getter_;

103 int caller_id_;	140 int caller_id_;

104 std::string language_;

105 std::string grammar_;

106 bool filter_profanities_;

107 std::string hardware_info_;

108 std::string origin_url_;

109 int num_samples_recorded_;	141 int num_samples_recorded_;

110 float audio_level_;	142 float audio_level_;

	143 bool in_event_dispatching_;
	Satish 2012/04/12 08:58:33 probably reword as 'is_dispatching_event_' probably reword as 'is_dispatching_event_' Primiano Tucci (use gerrit) 2012/04/12 12:56:48 Done. Show quoted text On 2012/04/12 08:58:33, Satish wrote: > probably reword as 'is_dispatching_event_' Done.
	144 FSMState state_;

111	145

112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	146 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

113 };	147 };

114	148

115 } // namespace speech	149 } // namespace speech

116	150

117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	151 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW

« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | no next file with comments »