content/browser/speech/speech_recognizer_impl.h - Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased from master. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include "base/basictypes.h"	8 #include "base/basictypes.h"

	9 #include "base/memory/ref_counted.h"

9 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

10 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

11 #include "content/browser/speech/speech_recognition_engine.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

12 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

13 #include "content/public/common/speech_recognition_error.h"	14 #include "content/public/common/speech_recognition_error.h"

	15 #include "content/public/common/speech_recognition_result.h"

14 #include "media/audio/audio_input_controller.h"	16 #include "media/audio/audio_input_controller.h"

15 #include "net/url_request/url_request_context_getter.h"	17 #include "net/url_request/url_request_context_getter.h"

16	18

17 namespace content {	19 namespace content {

18 class SpeechRecognitionEventListener;	20 class SpeechRecognitionEventListener;

19 struct SpeechRecognitionResult;

20 }

21

22 namespace media {

23 class AudioInputController;

24 }	21 }

25	22

26 namespace speech {	23 namespace speech {

	24 // TODO(primiano) Next CL: Remove the Impl suffix and the exported

	25 // /content/public/browser/speech_recognizer.h interface since this class should

	26 // not be visible outside (currently we need it for speech input extension API).

27	27

28 // Records audio, sends recorded audio to server and translates server response	28 // Handles speech recognition for a session (identified by \|caller_id\|), taking

29 // to recognition result.	29 // care of audio capture, silence detection/endpointer and interaction with the

	30 // SpeechRecognitionEngine.

30 class CONTENT_EXPORT SpeechRecognizerImpl	31 class CONTENT_EXPORT SpeechRecognizerImpl

31 : public NON_EXPORTED_BASE(content::SpeechRecognizer),	32 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

32 public media::AudioInputController::EventHandler,	33 public media::AudioInputController::EventHandler,

33 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {	34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {

34 public:	35 public:

35 static const int kAudioSampleRate;	36 static const int kAudioSampleRate;

36 static const ChannelLayout kChannelLayout;	37 static const ChannelLayout kChannelLayout;

37 static const int kNumBitsPerAudioSample;	38 static const int kNumBitsPerAudioSample;

38 static const int kNoSpeechTimeoutMs;	39 static const int kNoSpeechTimeoutMs;

39 static const int kEndpointerEstimationTimeMs;	40 static const int kEndpointerEstimationTimeMs;

40	41

41 SpeechRecognizerImpl(	42 SpeechRecognizerImpl(

42 content::SpeechRecognitionEventListener* listener,	43 content::SpeechRecognitionEventListener* listener,

43 int caller_id,	44 int caller_id,

44 const std::string& language,	45 SpeechRecognitionEngine* engine);

45 const std::string& grammar,

46 net::URLRequestContextGetter* context_getter,

47 bool filter_profanities,

48 const std::string& hardware_info,

49 const std::string& origin_url);

50 virtual ~SpeechRecognizerImpl();	46 virtual ~SpeechRecognizerImpl();

51	47

52 // content::SpeechRecognizer methods.	48 // content::SpeechRecognizer methods.

53 virtual void StartRecognition() OVERRIDE;	49 virtual void StartRecognition() OVERRIDE;

54 virtual void AbortRecognition() OVERRIDE;	50 virtual void AbortRecognition() OVERRIDE;

55 virtual void StopAudioCapture() OVERRIDE;	51 virtual void StopAudioCapture() OVERRIDE;

56 virtual bool IsActive() const OVERRIDE;	52 virtual bool IsActive() const OVERRIDE;

57 virtual bool IsCapturingAudio() const OVERRIDE;	53 virtual bool IsCapturingAudio() const OVERRIDE;

58 const SpeechRecognitionEngine& recognition_engine() const;	54 const SpeechRecognitionEngine& recognition_engine() const;

59	55

	56 private:

	57 friend class SpeechRecognizerImplTest;

	58

	59 enum FSMState {

	60 kIdle = 0,
	Satish 2012/03/27 09:47:42 enum values should be MACRO_STYLE http://dev.chrom enum values should be MACRO_STYLE http://dev.chromium.org/developers/coding-style (in the Naming section) Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done. Show quoted text On 2012/03/27 09:47:42, Satish wrote: > enum values should be MACRO_STYLE > http://dev.chromium.org/developers/coding-style (in the Naming section) Done.
	61 kStartingRecognition,

	62 kEstimatingEnvironment,

	63 kWaitingForSpeech,

	64 kRecognizingSpeech,

	65 kWaitingFinalResult,

	66 kMaxState = kWaitingFinalResult

	67 };

	68

	69 enum FSMEvent {

	70 kAbortRequest = 0,
	Satish 2012/03/27 09:47:42 seems like we can drop the 'Request' suffix in the seems like we can drop the 'Request' suffix in these 3 as the rest don't use it Primiano Tucci (use gerrit) 2012/03/28 13:24:44 I added the suffix because their name should repre Show quoted text On 2012/03/27 09:47:42, Satish wrote: > seems like we can drop the 'Request' suffix in these 3 as the rest don't use it I added the suffix because their name should represent an event. Removing that suffix would make them sound as actions, not event. Furthermore, I wonder if any name clash might occur renaming them just to ABORT and START.
	71 kStartRequest,

	72 kStopCaptureRequest,

	73 kAudioData,

	74 kRecognitionResult,

	75 kRecognitionError,

	76 kAudioError,

	77 kMaxEvent = kAudioError

	78 };

	79

	80 struct FSMEventArgs {

	81 int audio_error_code;

	82 AudioChunk* audio_data;

	83 content::SpeechRecognitionResult speech_result;

	84 content::SpeechRecognitionError error;
	Satish 2012/03/27 09:47:42 change to speech_error change to speech_error Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done. Show quoted text On 2012/03/27 09:47:42, Satish wrote: > change to speech_error Done.
	85 FSMEventArgs();

	86 };

	87

60 // AudioInputController::EventHandler methods.	88 // AudioInputController::EventHandler methods.

61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	89 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	90 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

63 virtual void OnError(media::AudioInputController* controller,	91 virtual void OnError(media::AudioInputController* controller,

64 int error_code) OVERRIDE;	92 int error_code) OVERRIDE;

65 virtual void OnData(media::AudioInputController* controller,	93 virtual void OnData(media::AudioInputController* controller,

66 const uint8* data,	94 const uint8* data, uint32 size) OVERRIDE;

67 uint32 size) OVERRIDE;

68	95

69 // SpeechRecognitionEngineDelegate methods.	96 // SpeechRecognitionEngineDelegate methods.

70 virtual void OnSpeechRecognitionEngineResult(	97 virtual void OnSpeechRecognitionEngineResult(

71 const content::SpeechRecognitionResult& result) OVERRIDE;	98 const content::SpeechRecognitionResult& result) OVERRIDE;

72 virtual void OnSpeechRecognitionEngineError(	99 virtual void OnSpeechRecognitionEngineError(

73 const content::SpeechRecognitionError& error) OVERRIDE;	100 const content::SpeechRecognitionError& error) OVERRIDE;

74	101

75 private:	102 void DispatchEvent(FSMEvent event, FSMEventArgs);

76 friend class SpeechRecognizerImplTest;	103 void ProcessAudioPipeline();

77	104 FSMState ProcessEvent(FSMEvent event);

78 void InformErrorAndAbortRecognition(	105 FSMState InitializeAndStartRecording();
	Satish 2012/03/27 09:47:42 rename to StartRecording rename to StartRecording Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done. Show quoted text On 2012/03/27 09:47:42, Satish wrote: > rename to StartRecording Done.
79 content::SpeechRecognitionErrorCode error);	106 FSMState StartSpeechRecognition();
	Satish 2012/03/27 09:47:42 rename to StartRecognitionEngine rename to StartRecognitionEngine Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done. Show quoted text On 2012/03/27 09:47:42, Satish wrote: > rename to StartRecognitionEngine Done.
80 void SendRecordedAudioToServer();	107 FSMState EnvironmentEstimation();

81	108 FSMState DetectUserSpeechOrTimeout();

82 void HandleOnError(int error_code); // Handles OnError in the IO thread.	109 FSMState StopCaptureAndWaitForResult();

83	110 FSMState ProcessIntermediateRecognitionResult();

84 // Handles OnData in the IO thread. Takes ownership of \|raw_audio\|.	111 FSMState ProcessFinalRecognitionResult();

85 void HandleOnData(AudioChunk* raw_audio);	112 FSMState Abort();

86	113 FSMState Abort(const content::SpeechRecognitionError& error);

87 // Helper method which closes the audio controller and blocks until done.	114 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);

	115 FSMState DetectEndOfSpeech();

	116 FSMState DoNothing() const;

	117 int GetElapsedTimeMs() const;

	118 void UpdateSignalAndNoiseLevels(const float& rms);

88 void CloseAudioControllerSynchronously();	119 void CloseAudioControllerSynchronously();

89

90 void SetAudioManagerForTesting(AudioManager* audio_manager);	120 void SetAudioManagerForTesting(AudioManager* audio_manager);

91	121

92 content::SpeechRecognitionEventListener* listener_;	122 content::SpeechRecognitionEventListener* listener_;

93 AudioManager* testing_audio_manager_;	123 AudioManager* testing_audio_manager_;

94 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;	124 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

95 Endpointer endpointer_;	125 Endpointer endpointer_;

96 scoped_refptr<media::AudioInputController> audio_controller_;	126 scoped_refptr<media::AudioInputController> audio_controller_;

97 scoped_refptr<net::URLRequestContextGetter> context_getter_;

98 int caller_id_;	127 int caller_id_;

99 std::string language_;

100 std::string grammar_;

101 bool filter_profanities_;

102 std::string hardware_info_;

103 std::string origin_url_;

104 int num_samples_recorded_;	128 int num_samples_recorded_;

	129 bool clipper_detected_clip_;

105 float audio_level_;	130 float audio_level_;

	131 float rms_;

	132 int event_dispatch_nesting_level_;

	133 FSMState state_;

	134 FSMEvent event_;

	135 FSMEventArgs* event_args_;

106	136

107 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	137 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

108 };	138 };

109	139

110 } // namespace speech	140 } // namespace speech

111	141

112 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	142 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW

« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')