content/browser/speech/speech_recognizer_impl.h - Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased from master due to renames in media:: package. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include "base/basictypes.h"	8 #include "base/basictypes.h"

9 #include "base/memory/ref_counted.h"	9 #include "base/memory/ref_counted.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

11 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

12 #include "content/browser/speech/speech_recognition_engine.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

13 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

14 #include "content/public/common/speech_recognition_error.h"	14 #include "content/public/common/speech_recognition_error.h"

	15 #include "content/public/common/speech_recognition_result.h"

15 #include "media/audio/audio_input_controller.h"	16 #include "media/audio/audio_input_controller.h"

16 #include "net/url_request/url_request_context_getter.h"	17 #include "net/url_request/url_request_context_getter.h"

17	18

18 namespace content {	19 namespace content {

19 class SpeechRecognitionEventListener;	20 class SpeechRecognitionEventListener;

20 struct SpeechRecognitionResult;	21 struct SpeechRecognitionResult;

21 }	22 }

22	23

23 namespace media {	24 namespace media {

24 class AudioInputController;	25 class AudioInputController;

25 class AudioManager;	26 class AudioManager;

26 }	27 }

27	28

28 namespace speech {	29 namespace speech {

	30 // TODO(primiano) Next CL: Remove the Impl suffix and the exported

	31 // /content/public/browser/speech_recognizer.h interface since this class should

	32 // not be visible outside (currently we need it for speech input extension API).

29	33

30 // Records audio, sends recorded audio to server and translates server response	34 // Handles speech recognition for a session (identified by \|caller_id\|), taking

31 // to recognition result.	35 // care of audio capture, silence detection/endpointer and interaction with the

	36 // SpeechRecognitionEngine.

32 class CONTENT_EXPORT SpeechRecognizerImpl	37 class CONTENT_EXPORT SpeechRecognizerImpl

33 : public NON_EXPORTED_BASE(content::SpeechRecognizer),	38 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

34 public media::AudioInputController::EventHandler,	39 public media::AudioInputController::EventHandler,

35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {	40 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {

36 public:	41 public:

37 static const int kAudioSampleRate;	42 static const int kAudioSampleRate;

38 static const ChannelLayout kChannelLayout;	43 static const ChannelLayout kChannelLayout;

39 static const int kNumBitsPerAudioSample;	44 static const int kNumBitsPerAudioSample;

40 static const int kNoSpeechTimeoutMs;	45 static const int kNoSpeechTimeoutMs;

41 static const int kEndpointerEstimationTimeMs;	46 static const int kEndpointerEstimationTimeMs;

42	47

43 SpeechRecognizerImpl(	48 SpeechRecognizerImpl(

44 content::SpeechRecognitionEventListener* listener,	49 content::SpeechRecognitionEventListener* listener,

45 int caller_id,	50 int caller_id,

46 const std::string& language,	51 SpeechRecognitionEngine* engine);

47 const std::string& grammar,

48 net::URLRequestContextGetter* context_getter,

49 bool filter_profanities,

50 const std::string& hardware_info,

51 const std::string& origin_url);

52 virtual ~SpeechRecognizerImpl();	52 virtual ~SpeechRecognizerImpl();

53	53

54 // content::SpeechRecognizer methods.	54 // content::SpeechRecognizer methods.

55 virtual void StartRecognition() OVERRIDE;	55 virtual void StartRecognition() OVERRIDE;

56 virtual void AbortRecognition() OVERRIDE;	56 virtual void AbortRecognition() OVERRIDE;

57 virtual void StopAudioCapture() OVERRIDE;	57 virtual void StopAudioCapture() OVERRIDE;

58 virtual bool IsActive() const OVERRIDE;	58 virtual bool IsActive() const OVERRIDE;

59 virtual bool IsCapturingAudio() const OVERRIDE;	59 virtual bool IsCapturingAudio() const OVERRIDE;

60 const SpeechRecognitionEngine& recognition_engine() const;	60 const SpeechRecognitionEngine& recognition_engine() const;

61	61

	62 private:

	63 friend class SpeechRecognizerImplTest;

	64

	65 enum FSMState {

	66 STATE_IDLE = 0,

	67 STATE_STARTING,

	68 STATE_ESTIMATING_ENVIRONMENT,

	69 STATE_WAITING_FOR_SPEECH,

	70 STATE_RECOGNIZING,

	71 STATE_WAITING_FINAL_RESULT,

	72 STATE_MAX = STATE_WAITING_FINAL_RESULT

	73 };

	74

	75 enum FSMEvent {

	76 EVENT_ABORT = 0,

	77 EVENT_START,

	78 EVENT_STOP_CAPTURE,

	79 EVENT_AUDIO_DATA,

	80 EVENT_ENGINE_RESULT,

	81 EVENT_ENGINE_ERROR,

	82 EVENT_AUDIO_ERROR,

	83 EVENT_MAX = EVENT_AUDIO_ERROR

	84 };

	85

	86 struct FSMEventArgs {

	87 FSMEvent event;

	88 int audio_error_code;

	89 scoped_refptr<AudioChunk> audio_data;

	90 content::SpeechRecognitionResult engine_result;

	91 content::SpeechRecognitionError engine_error;

	92 FSMEventArgs(FSMEvent event_value);
	bulach 2012/04/04 15:38:17 nits: move the constructor / destructor before the nits: move the constructor / destructor before the members, and add a \n after the dtor.. also, mark the ctor explicit. Primiano Tucci (use gerrit) 2012/04/11 10:05:41 Done. Show quoted text On 2012/04/04 15:38:17, bulach wrote: > nits: move the constructor / destructor before the members, and add a \n after > the dtor.. also, mark the ctor explicit. Done.
	93 ~FSMEventArgs();

	94 };

	95

62 // AudioInputController::EventHandler methods.	96 // AudioInputController::EventHandler methods.

63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	97 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	98 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

65 virtual void OnError(media::AudioInputController* controller,	99 virtual void OnError(media::AudioInputController* controller,

66 int error_code) OVERRIDE;	100 int error_code) OVERRIDE;

67 virtual void OnData(media::AudioInputController* controller,	101 virtual void OnData(media::AudioInputController* controller,

68 const uint8* data,	102 const uint8* data, uint32 size) OVERRIDE;

69 uint32 size) OVERRIDE;	103

	104 // Callback called on IO thread by audio_controller->Close().

	105 void OnAudioClosed(media::AudioInputController*);

70	106

71 // SpeechRecognitionEngineDelegate methods.	107 // SpeechRecognitionEngineDelegate methods.

72 virtual void OnSpeechRecognitionEngineResult(	108 virtual void OnSpeechRecognitionEngineResult(

73 const content::SpeechRecognitionResult& result) OVERRIDE;	109 const content::SpeechRecognitionResult& result) OVERRIDE;

74 virtual void OnSpeechRecognitionEngineError(	110 virtual void OnSpeechRecognitionEngineError(

75 const content::SpeechRecognitionError& error) OVERRIDE;	111 const content::SpeechRecognitionError& error) OVERRIDE;

76	112

77 private:	113 void DispatchEvent(const FSMEventArgs& event_args);

78 friend class SpeechRecognizerImplTest;	114 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

79	115 void ProcessAudioPipeline(const AudioChunk& raw_audio);

80 void InformErrorAndAbortRecognition(	116 FSMState StartRecording(const FSMEventArgs& event_args);

81 content::SpeechRecognitionErrorCode error);	117 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

82 void SendRecordedAudioToServer();	118 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

83	119 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

84 void HandleOnError(int error_code); // Handles OnError in the IO thread.	120 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);

85	121 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

86 // Handles OnData in the IO thread.	122 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);	123 FSMState Abort(const FSMEventArgs& event_args);

88	124 FSMState AbortWithError(const content::SpeechRecognitionError* error);

89 void OnAudioClosed(media::AudioInputController*);	125 FSMState AbortWithError(const content::SpeechRecognitionError& error);

90	126 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);

91 // Helper method which closes the audio controller and frees it asynchronously	127 FSMState DoNothing(const FSMEventArgs& event_args) const;

92 // without blocking the IO thread.	128 int GetElapsedTimeMs() const;

	129 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);

93 void CloseAudioControllerAsynchronously();	130 void CloseAudioControllerAsynchronously();
	bulach 2012/04/04 15:38:17 nits: we tend to declare all non-virtuals first, t nits: we tend to declare all non-virtuals first, then all virtuals afterwards.. Primiano Tucci (use gerrit) 2012/04/11 10:05:41 Done. Show quoted text On 2012/04/04 15:38:17, bulach wrote: > nits: we tend to declare all non-virtuals first, then all virtuals afterwards.. Done.
94	131

95 void SetAudioManagerForTesting(media::AudioManager* audio_manager);	132 void SetAudioManagerForTesting(media::AudioManager* audio_manager);

96	133

97 content::SpeechRecognitionEventListener* listener_;	134 content::SpeechRecognitionEventListener* listener_;

98 media::AudioManager* testing_audio_manager_;	135 media::AudioManager* testing_audio_manager_;

99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;	136 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

100 Endpointer endpointer_;	137 Endpointer endpointer_;

101 scoped_refptr<media::AudioInputController> audio_controller_;	138 scoped_refptr<media::AudioInputController> audio_controller_;

102 scoped_refptr<net::URLRequestContextGetter> context_getter_;

103 int caller_id_;	139 int caller_id_;

104 std::string language_;

105 std::string grammar_;

106 bool filter_profanities_;

107 std::string hardware_info_;

108 std::string origin_url_;

109 int num_samples_recorded_;	140 int num_samples_recorded_;

110 float audio_level_;	141 float audio_level_;

	142 bool in_event_dispatching_;

	143 FSMState state_;

111	144

112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	145 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

113 };	146 };

114	147

115 } // namespace speech	148 } // namespace speech

116	149

117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	150 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW

« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')