content/browser/speech/speech_recognizer_impl.h - Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed according to (partial) Satish review. Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« content/browser/speech/speech_recognition_manager_impl.cc ('K') | « content/browser/speech/speech_recognition_request_unittest.cc ('k') | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/public/common/speech_recognition_result.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include <list>	8 #include "base/basictypes.h"

9 #include <utility>	9 #include "base/memory/ref_counted.h"

10

11 #include "base/compiler_specific.h"

12 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

13 #include "content/browser/speech/audio_encoder.h"

14 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

15 #include "content/browser/speech/speech_recognition_request.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

16 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

17 #include "content/public/common/speech_recognition_result.h"	14 #include "content/public/common/speech_recognition_result.h"

18 #include "media/audio/audio_input_controller.h"	15 #include "media/audio/audio_input_controller.h"

	16 #include "net/url_request/url_request_context_getter.h"

19	17

20 class AudioManager;	18 namespace media {

	19 class AudioInputController;
	Satish 2012/03/21 13:29:48 2 spaces to 1 space 2 spaces to 1 space
	20 }

21	21

22 namespace content {	22 namespace content {

	23 struct SpeechRecognitionError;

23 class SpeechRecognitionEventListener;	24 class SpeechRecognitionEventListener;

24 }	25 }

25	26

26 namespace speech {	27 namespace speech {

	28 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
	Satish 2012/03/21 13:29:48 add newline above add newline above
	29 // /content/public/browser/speech_recognizer.h interface since this class should

	30 // not be visible outside (currently we need it for speech input extension API).

27	31

28 // Records audio, sends recorded audio to server and translates server response	32 // Handles speech recognition for a session (identified by \|caller_id\|), taking

29 // to recognition result.	33 // care of audio capture, silence detection/endpointer and interaction with the

	34 // SpeechRecognitionEngine.

30 class CONTENT_EXPORT SpeechRecognizerImpl	35 class CONTENT_EXPORT SpeechRecognizerImpl

31 : NON_EXPORTED_BASE(public content::SpeechRecognizer),	36 : public NON_EXPORTED_BASE(content::SpeechRecognizer),

32 public media::AudioInputController::EventHandler,	37 public media::AudioInputController::EventHandler,

33 public SpeechRecognitionRequestDelegate {	38 public SpeechRecognitionEngineDelegate {

34 public:	39 public:

35 static const int kAudioSampleRate;	40 static const int kAudioSampleRate;

36 static const int kAudioPacketIntervalMs; // Duration of each audio packet.

37 static const ChannelLayout kChannelLayout;	41 static const ChannelLayout kChannelLayout;

38 static const int kNumBitsPerAudioSample;	42 static const int kNumBitsPerAudioSample;

39 static const int kNoSpeechTimeoutSec;	43 static const int kNoSpeechTimeoutMs;

40 static const int kEndpointerEstimationTimeMs;	44 static const int kEndpointerEstimationTimeMs;

41	45

42 SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener,	46 SpeechRecognizerImpl(

43 int caller_id,	47 content::SpeechRecognitionEventListener* listener,

44 const std::string& language,	48 int caller_id,

45 const std::string& grammar,	49 SpeechRecognitionEngine* engine);

46 net::URLRequestContextGetter* context_getter,

47 bool filter_profanities,

48 const std::string& hardware_info,

49 const std::string& origin_url);

50

51 virtual ~SpeechRecognizerImpl();	50 virtual ~SpeechRecognizerImpl();

52	51

53 // content::SpeechRecognizer methods.	52 // content::SpeechRecognizer methods.

54 virtual bool StartRecognition() OVERRIDE;	53 virtual void StartRecognition() OVERRIDE;

55 virtual void AbortRecognition() OVERRIDE;	54 virtual void AbortRecognition() OVERRIDE;

56 virtual void StopAudioCapture() OVERRIDE;	55 virtual void StopAudioCapture() OVERRIDE;

57 virtual bool IsActive() const OVERRIDE;	56 virtual bool IsActive() const OVERRIDE;

58 virtual bool IsCapturingAudio() const OVERRIDE;	57 virtual bool IsCapturingAudio() const OVERRIDE;

	58 const SpeechRecognitionEngine& recognition_engine() const;

	59

	60 private:

	61 friend class SpeechRecognizerImplTest;

	62

	63 enum FSMState {
	Satish 2012/03/21 13:29:48 could the FSM changes be moved to a separate CL as could the FSM changes be moved to a separate CL as that is conceptually separate from the rest of renames and cleanup? Also would be easier to review Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done. Show quoted text On 2012/03/21 13:29:48, Satish wrote: > could the FSM changes be moved to a separate CL as that is conceptually separate > from the rest of renames and cleanup? Also would be easier to review Done.
	64 kIdle = 0,

	65 kStartingRecognition,

	66 kEstimatingEnvironment,

	67 kWaitingForSpeech,

	68 kRecognizingSpeech,

	69 kWaitingFinalResult,

	70 kMaxState = kWaitingFinalResult

	71 };

	72

	73 enum FSMEvent {

	74 kAbortRequest = 0,

	75 kStartRequest,

	76 kStopCaptureRequest,

	77 kAudioData,

	78 kRecognitionResult,

	79 kRecognitionError,

	80 kAudioError,

	81 kMaxEvent = kAudioError

	82 };

	83

	84 struct FSMEventArgs {

	85 int audio_error_code;

	86 AudioChunk* audio_data;

	87 content::SpeechRecognitionResult speech_result;

	88 content::SpeechRecognitionError error;

	89 FSMEventArgs();

	90 };

59	91

60 // AudioInputController::EventHandler methods.	92 // AudioInputController::EventHandler methods.

61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	93 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	94 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

63 virtual void OnError(media::AudioInputController* controller,	95 virtual void OnError(media::AudioInputController* controller,

64 int error_code) OVERRIDE;	96 int error_code) OVERRIDE;

65 virtual void OnData(media::AudioInputController* controller,	97 virtual void OnData(media::AudioInputController* controller,

66 const uint8* data,	98 const uint8* data, uint32 size) OVERRIDE;

67 uint32 size) OVERRIDE;

68	99

69 // SpeechRecognitionRequest::Delegate methods.	100 // SpeechRecognitionEngineDelegate methods.

70 virtual void SetRecognitionResult(	101 virtual void OnSpeechEngineResult(

71 const content::SpeechRecognitionResult& result) OVERRIDE;	102 const content::SpeechRecognitionResult& result) OVERRIDE;

	103 virtual void OnSpeechEngineError(

	104 const content::SpeechRecognitionError& error) OVERRIDE;

72	105

73 private:	106 void DispatchEvent(FSMEvent event, FSMEventArgs);

74 friend class SpeechRecognizerImplTest;	107 void ProcessAudioPipeline();

75	108 FSMState ProcessEvent(FSMEvent event);

76 void InformErrorAndAbortRecognition(	109 FSMState InitializeAndStartRecording();

77 content::SpeechRecognitionErrorCode error);	110 FSMState StartSpeechRecognition();

78 void SendRecordedAudioToServer();	111 FSMState EnvironmentEstimation();

79	112 FSMState DetectUserSpeechOrTimeout();

80 void HandleOnError(int error_code); // Handles OnError in the IO thread.	113 FSMState StopCaptureAndWaitForResult();

81	114 FSMState ProcessIntermediateRecognitionResult();

82 // Handles OnData in the IO thread. Takes ownership of \|raw_audio\|.	115 FSMState ProcessFinalRecognitionResult();

83 void HandleOnData(AudioChunk* raw_audio);	116 FSMState Abort();

84	117 FSMState Abort(const content::SpeechRecognitionError& error);

85 // Helper method which closes the audio controller and blocks until done.	118 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);

	119 FSMState DetectEndOfSpeech();

	120 FSMState DoNothing() const;

	121 int GetElapsedTimeMs() const;

	122 void UpdateSignalAndNoiseLevels(const float& rms);

86 void CloseAudioControllerSynchronously();	123 void CloseAudioControllerSynchronously();

87

88 void SetAudioManagerForTesting(AudioManager* audio_manager);	124 void SetAudioManagerForTesting(AudioManager* audio_manager);

89	125

90 content::SpeechRecognitionEventListener* listener_;	126 content::SpeechRecognitionEventListener* listener_;

	127 AudioManager* testing_audio_manager_;

	128 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

	129 Endpointer endpointer_;

	130 scoped_refptr<media::AudioInputController> audio_controller_;

91 int caller_id_;	131 int caller_id_;

92 std::string language_;

93 std::string grammar_;

94 bool filter_profanities_;

95 std::string hardware_info_;

96 std::string origin_url_;

97

98 scoped_ptr<SpeechRecognitionRequest> request_;

99 scoped_refptr<media::AudioInputController> audio_controller_;

100 scoped_refptr<net::URLRequestContextGetter> context_getter_;

101 AudioEncoder::Codec codec_;

102 scoped_ptr<AudioEncoder> encoder_;

103 Endpointer endpointer_;

104 int num_samples_recorded_;	132 int num_samples_recorded_;

	133 bool clipper_detected_clip_;

105 float audio_level_;	134 float audio_level_;

106 AudioManager* audio_manager_;	135 float rms_;

	136 int event_dispatch_nesting_level_;

	137 FSMState state_;

	138 FSMEvent event_;

	139 FSMEventArgs* event_args_;

107	140

108 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	141 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

109 };	142 };

110	143

111 } // namespace speech	144 } // namespace speech

112	145

113 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	146 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW