content/browser/speech/speech_recognizer_impl.h - Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Minor style fixes. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include "base/basictypes.h"	8 #include "base/basictypes.h"

9 #include "base/memory/ref_counted.h"	9 #include "base/memory/ref_counted.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

11 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

12 #include "content/browser/speech/speech_recognition_engine.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

13 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

14 #include "content/public/common/speech_recognition_error.h"	14 #include "content/public/common/speech_recognition_error.h"

	15 #include "content/public/common/speech_recognition_result.h"

15 #include "media/audio/audio_input_controller.h"	16 #include "media/audio/audio_input_controller.h"

16 #include "net/url_request/url_request_context_getter.h"	17 #include "net/url_request/url_request_context_getter.h"

17	18

18 namespace content {	19 namespace content {

19 class SpeechRecognitionEventListener;	20 class SpeechRecognitionEventListener;

20 struct SpeechRecognitionResult;

21 }

22

23 namespace media {

24 class AudioInputController;

25 }	21 }

26	22

27 namespace speech {	23 namespace speech {

	24 // TODO(primiano) Next CL: Remove the Impl suffix and the exported

	25 // /content/public/browser/speech_recognizer.h interface since this class should

	26 // not be visible outside (currently we need it for speech input extension API).

28	27

29 // Records audio, sends recorded audio to server and translates server response	28 // Handles speech recognition for a session (identified by \|caller_id\|), taking

30 // to recognition result.	29 // care of audio capture, silence detection/endpointer and interaction with the

	30 // SpeechRecognitionEngine.

31 class CONTENT_EXPORT SpeechRecognizerImpl	31 class CONTENT_EXPORT SpeechRecognizerImpl

32 : public NON_EXPORTED_BASE(content::SpeechRecognizer),	32 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

33 public media::AudioInputController::EventHandler,	33 public media::AudioInputController::EventHandler,

34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {	34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {

35 public:	35 public:

36 static const int kAudioSampleRate;	36 static const int kAudioSampleRate;

37 static const ChannelLayout kChannelLayout;	37 static const ChannelLayout kChannelLayout;

38 static const int kNumBitsPerAudioSample;	38 static const int kNumBitsPerAudioSample;

39 static const int kNoSpeechTimeoutMs;	39 static const int kNoSpeechTimeoutMs;

40 static const int kEndpointerEstimationTimeMs;	40 static const int kEndpointerEstimationTimeMs;

41	41

42 SpeechRecognizerImpl(	42 SpeechRecognizerImpl(

43 content::SpeechRecognitionEventListener* listener,	43 content::SpeechRecognitionEventListener* listener,

44 int caller_id,	44 int caller_id,

45 const std::string& language,	45 SpeechRecognitionEngine* engine);

46 const std::string& grammar,

47 net::URLRequestContextGetter* context_getter,

48 bool filter_profanities,

49 const std::string& hardware_info,

50 const std::string& origin_url);

51 virtual ~SpeechRecognizerImpl();	46 virtual ~SpeechRecognizerImpl();

52	47

53 // content::SpeechRecognizer methods.	48 // content::SpeechRecognizer methods.

54 virtual void StartRecognition() OVERRIDE;	49 virtual void StartRecognition() OVERRIDE;

55 virtual void AbortRecognition() OVERRIDE;	50 virtual void AbortRecognition() OVERRIDE;

56 virtual void StopAudioCapture() OVERRIDE;	51 virtual void StopAudioCapture() OVERRIDE;

57 virtual bool IsActive() const OVERRIDE;	52 virtual bool IsActive() const OVERRIDE;

58 virtual bool IsCapturingAudio() const OVERRIDE;	53 virtual bool IsCapturingAudio() const OVERRIDE;

59 const SpeechRecognitionEngine& recognition_engine() const;	54 const SpeechRecognitionEngine& recognition_engine() const;

60	55

	56 private:

	57 friend class SpeechRecognizerImplTest;

	58

	59 enum FSMState {

	60 STATE_IDLE = 0,

	61 STATE_STARTING,

	62 STATE_ESTIMATING_ENVIRONMENT,

	63 STATE_WAITING_FOR_SPEECH,

	64 STATE_RECOGNIZING,

	65 STATE_WAITING_FINAL_RESULT,

	66 STATE_MAX = STATE_WAITING_FINAL_RESULT

	67 };

	68

	69 enum FSMEvent {

	70 EVENT_ABORT = 0,

	71 EVENT_START,

	72 EVENT_STOP_CAPTURE,

	73 EVENT_AUDIO_DATA,

	74 EVENT_ENGINE_RESULT,

	75 EVENT_ENGINE_ERROR,

	76 EVENT_AUDIO_ERROR,

	77 EVENT_MAX = EVENT_AUDIO_ERROR

	78 };

	79

	80 struct FSMEventArgs {

	81 FSMEvent event;

	82 int audio_error_code;

	83 scoped_refptr<AudioChunk> audio_data;

	84 content::SpeechRecognitionResult engine_result;

	85 content::SpeechRecognitionError engine_error;

	86 FSMEventArgs();

	87 ~FSMEventArgs();

	88 };

	89

	90 typedef base::Callback<FSMState(const FSMEventArgs&)> TransitionFunction;

	91

61 // AudioInputController::EventHandler methods.	92 // AudioInputController::EventHandler methods.

62 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	93 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

63 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	94 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

64 virtual void OnError(media::AudioInputController* controller,	95 virtual void OnError(media::AudioInputController* controller,

65 int error_code) OVERRIDE;	96 int error_code) OVERRIDE;

66 virtual void OnData(media::AudioInputController* controller,	97 virtual void OnData(media::AudioInputController* controller,

67 const uint8* data,	98 const uint8* data, uint32 size) OVERRIDE;

68 uint32 size) OVERRIDE;	99 // Callback called on IO thread by audio_controller->Close().
	hans 2012/04/02 16:05:59 nit: maybe put a blank line before this, since it' nit: maybe put a blank line before this, since it's not part of the EventHandler methods. Primiano Tucci (use gerrit) 2012/04/03 10:16:39 Done. Show quoted text On 2012/04/02 16:05:59, hans wrote: > nit: maybe put a blank line before this, since it's not part of the EventHandler > methods. Done.
	100 void OnAudioClosed(media::AudioInputController*);

69	101

70 // SpeechRecognitionEngineDelegate methods.	102 // SpeechRecognitionEngineDelegate methods.

71 virtual void OnSpeechRecognitionEngineResult(	103 virtual void OnSpeechRecognitionEngineResult(

72 const content::SpeechRecognitionResult& result) OVERRIDE;	104 const content::SpeechRecognitionResult& result) OVERRIDE;

73 virtual void OnSpeechRecognitionEngineError(	105 virtual void OnSpeechRecognitionEngineError(

74 const content::SpeechRecognitionError& error) OVERRIDE;	106 const content::SpeechRecognitionError& error) OVERRIDE;

75	107

76 private:	108 void InitializeFSM();

77 friend class SpeechRecognizerImplTest;	109 void DispatchEvent(FSMEvent, FSMEventArgs);

78	110 void ProcessAudioPipeline(const AudioChunk& raw_audio);

79 void InformErrorAndAbortRecognition(	111 FSMState StartRecording(const FSMEventArgs& event_args);

80 content::SpeechRecognitionErrorCode error);	112 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

81 void SendRecordedAudioToServer();	113 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

82	114 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

83 void HandleOnError(int error_code); // Handles OnError in the IO thread.	115 FSMState StopCaptureAndWaitResult(const FSMEventArgs& event_args);

84	116 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

85 // Handles OnData in the IO thread.	117 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

86 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);	118 FSMState Abort(const FSMEventArgs& event_args);

87	119 FSMState AbortWithError(const content::SpeechRecognitionError* error);

88 void OnAudioClosed(media::AudioInputController*);	120 FSMState AbortWithError(const content::SpeechRecognitionError& error);

89	121 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);

90 // Helper method which closes the audio controller and frees it asynchronously	122 FSMState DoNothing(const FSMEventArgs& event_args) const;

91 // without blocking the IO thread.	123 int GetElapsedTimeMs() const;

	124 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);

92 void CloseAudioControllerAsynchronously();	125 void CloseAudioControllerAsynchronously();

93

94 void SetAudioManagerForTesting(AudioManager* audio_manager);	126 void SetAudioManagerForTesting(AudioManager* audio_manager);

95	127

96 content::SpeechRecognitionEventListener* listener_;	128 content::SpeechRecognitionEventListener* listener_;

97 AudioManager* testing_audio_manager_;	129 AudioManager* testing_audio_manager_;

98 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;	130 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

99 Endpointer endpointer_;	131 Endpointer endpointer_;

100 scoped_refptr<media::AudioInputController> audio_controller_;	132 scoped_refptr<media::AudioInputController> audio_controller_;

101 scoped_refptr<net::URLRequestContextGetter> context_getter_;

102 int caller_id_;	133 int caller_id_;

103 std::string language_;

104 std::string grammar_;

105 bool filter_profanities_;

106 std::string hardware_info_;

107 std::string origin_url_;

108 int num_samples_recorded_;	134 int num_samples_recorded_;

109 float audio_level_;	135 float audio_level_;

	136 bool in_event_dispatching_;

	137 FSMState state_;

	138 TransitionFunction fsm[STATE_MAX + 1][EVENT_MAX + 1];

	139 const TransitionFunction kUnfeasibleTransition;

110	140

111 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	141 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

112 };	142 };

113	143

114 } // namespace speech	144 } // namespace speech

115	145

116 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	146 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW

« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')