content/browser/speech/speech_recognizer_impl.h - Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased from master. Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« content/browser/speech/speech_recognition_manager_impl.cc ('K') | « content/browser/speech/speech_recognition_request_unittest.cc ('k') | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include <list>	8 #include "base/basictypes.h"

9 #include <utility>	9 #include "base/memory/ref_counted.h"

10

11 #include "base/compiler_specific.h"

12 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

13 #include "content/browser/speech/audio_encoder.h"

14 #include "content/browser/speech/endpointer/endpointer.h"	11 #include "content/browser/speech/endpointer/endpointer.h"

15 #include "content/browser/speech/speech_recognition_request.h"	12 #include "content/browser/speech/speech_recognition_engine.h"

16 #include "content/public/browser/speech_recognizer.h"	13 #include "content/public/browser/speech_recognizer.h"

17 #include "content/public/common/speech_recognition_result.h"	14 #include "content/public/common/speech_recognition_result.h"

18 #include "media/audio/audio_input_controller.h"	15 #include "media/audio/audio_input_controller.h"

	16 #include "net/url_request/url_request_context_getter.h"

19	17

20 class AudioManager;	18 namespace media {

	19 class AudioInputController;

	20 }

21	21

22 namespace content {	22 namespace content {

	23 struct SpeechRecognitionError;

23 class SpeechRecognitionEventListener;	24 class SpeechRecognitionEventListener;

24 }	25 }

25	26

26 namespace speech {	27 namespace speech {

	28 // TODO(primiano) Current CL: check CONTENT_EXPORT and NON_EXPORTED_BASE
	hans 2012/03/16 11:12:56 should this be addressed before this cl is landed? should this be addressed before this cl is landed? Primiano Tucci (use gerrit) 2012/03/16 15:03:42 Done. Show quoted text On 2012/03/16 11:12:56, hans wrote: > should this be addressed before this cl is landed? Done.
	29 // TODO(primiano) Next CL: Remove the Impl suffix and the exported

	30 // /content/public/browser/speech_recognizer.h interface since this class should

	31 // not be visible outside (currently we need it for speech input extension API).

27	32

28 // Records audio, sends recorded audio to server and translates server response	33 // Handles speech recognition for a session (identified by \|caller_id\|), taking

29 // to recognition result.	34 // care of audio capture, silence detection/endpointer and interaction with the

	35 // SpeechRecognitionEngine (that must be injected through set_delegate(...)).
	hans 2012/03/16 11:12:56 i'm confused by the last bit here.. "SpeechRecogni i'm confused by the last bit here.. "SpeechRecognitionEngine (that must be injected through set_delegate)" isn't the SpeechRecognitionEngine passed into the constructor? Primiano Tucci (use gerrit) 2012/03/16 15:03:42 Uh, definitely right. Probably it was just a wrong Show quoted text On 2012/03/16 11:12:56, hans wrote: > i'm confused by the last bit here.. "SpeechRecognitionEngine (that must be > injected through set_delegate)" > > isn't the SpeechRecognitionEngine passed into the constructor? Uh, definitely right. Probably it was just a wrong copy/paste.
30 class CONTENT_EXPORT SpeechRecognizerImpl	36 class CONTENT_EXPORT SpeechRecognizerImpl

31 : NON_EXPORTED_BASE(public content::SpeechRecognizer),	37 : NON_EXPORTED_BASE(public content::SpeechRecognizer),

32 public media::AudioInputController::EventHandler,	38 public media::AudioInputController::EventHandler,

33 public SpeechRecognitionRequestDelegate {	39 public SpeechRecognitionEngineDelegate {

34 public:	40 public:

35 static const int kAudioSampleRate;	41 static const int kAudioSampleRate;

36 static const int kAudioPacketIntervalMs; // Duration of each audio packet.

37 static const ChannelLayout kChannelLayout;	42 static const ChannelLayout kChannelLayout;

38 static const int kNumBitsPerAudioSample;	43 static const int kNumBitsPerAudioSample;

39 static const int kNoSpeechTimeoutSec;	44 static const int kNoSpeechTimeoutMs;

40 static const int kEndpointerEstimationTimeMs;	45 static const int kEndpointerEstimationTimeMs;

41	46

42 SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener,	47 SpeechRecognizerImpl(

43 int caller_id,	48 content::SpeechRecognitionEventListener* listener,

44 const std::string& language,	49 int caller_id,

45 const std::string& grammar,	50 SpeechRecognitionEngine* engine);

46 net::URLRequestContextGetter* context_getter,

47 bool filter_profanities,

48 const std::string& hardware_info,

49 const std::string& origin_url);

50

51 virtual ~SpeechRecognizerImpl();	51 virtual ~SpeechRecognizerImpl();

52	52

53 // content::SpeechRecognizer methods.	53 // content::SpeechRecognizer methods.

54 virtual bool StartRecognition() OVERRIDE;	54 virtual void StartRecognition() OVERRIDE;

55 virtual void AbortRecognition() OVERRIDE;	55 virtual void AbortRecognition() OVERRIDE;

56 virtual void StopAudioCapture() OVERRIDE;	56 virtual void StopAudioCapture() OVERRIDE;

57 virtual bool IsActive() const OVERRIDE;	57 virtual bool IsActive() const OVERRIDE;

58 virtual bool IsCapturingAudio() const OVERRIDE;	58 virtual bool IsCapturingAudio() const OVERRIDE;

	59 const SpeechRecognitionEngine& recognition_engine() const;

	60

	61 private:

	62 friend class SpeechRecognizerImplTest;

	63

	64 enum FSMState {

	65 kIdle = 0,

	66 kStartingRecognition,

	67 kEstimatingEnvironment,

	68 kWaitingForSpeech,

	69 kRecognizingSpeech,

	70 kWaitingFinalResult,

	71 kMaxState = kWaitingFinalResult

	72 };

	73

	74 enum FSMEvent {

	75 kAbortRequest = 0,

	76 kStartRequest,

	77 kStopCaptureRequest,

	78 kAudioData,

	79 kRecognitionResult,

	80 kRecognitionError,

	81 kAudioError,

	82 kMaxEvent = kAudioError

	83 };

	84

	85 struct FSMEventArgs {

	86 int audio_error_code;

	87 AudioChunk* audio_data;

	88 content::SpeechRecognitionResult speech_result;

	89 content::SpeechRecognitionError error;

	90 FSMEventArgs();

	91 };

59	92

60 // AudioInputController::EventHandler methods.	93 // AudioInputController::EventHandler methods.

61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}	94 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}	95 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}

63 virtual void OnError(media::AudioInputController* controller,	96 virtual void OnError(media::AudioInputController* controller,

64 int error_code) OVERRIDE;	97 int error_code) OVERRIDE;

65 virtual void OnData(media::AudioInputController* controller,	98 virtual void OnData(media::AudioInputController* controller,

66 const uint8* data,	99 const uint8* data, uint32 size) OVERRIDE;

67 uint32 size) OVERRIDE;

68	100

69 // SpeechRecognitionRequest::Delegate methods.	101 // SpeechRecognitionEngineDelegate methods.

70 virtual void SetRecognitionResult(	102 virtual void OnSpeechEngineResult(

71 const content::SpeechRecognitionResult& result) OVERRIDE;	103 const content::SpeechRecognitionResult& result) OVERRIDE;

	104 virtual void OnSpeechEngineError(

	105 const content::SpeechRecognitionError& error) OVERRIDE;

72	106

73 private:	107 void DispatchEvent(FSMEvent event, FSMEventArgs);

74 friend class SpeechRecognizerImplTest;	108 void ProcessAudioPipeline();

75	109 FSMState ProcessEvent(FSMEvent event);

76 void InformErrorAndAbortRecognition(	110 FSMState InitializeAndStartRecording();

77 content::SpeechRecognitionErrorCode error);	111 FSMState StartSpeechRecognition();

78 void SendRecordedAudioToServer();	112 FSMState EnvironmentEstimation();

79	113 FSMState DetectUserSpeechOrTimeout();

80 void HandleOnError(int error_code); // Handles OnError in the IO thread.	114 FSMState StopCaptureAndWaitForResult();

81	115 FSMState ProcessIntermediateRecognitionResult();

82 // Handles OnData in the IO thread. Takes ownership of \|raw_audio\|.	116 FSMState ProcessFinalRecognitionResult();

83 void HandleOnData(AudioChunk* raw_audio);	117 FSMState Abort();

84	118 FSMState Abort(const content::SpeechRecognitionError& error);

85 // Helper method which closes the audio controller and blocks until done.	119 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);

	120 FSMState DetectEndOfSpeech();

	121 FSMState DoNothing() const;

	122 int GetElapsedTimeMs() const;

	123 void UpdateSignalAndNoiseLevels(const float& rms);

86 void CloseAudioControllerSynchronously();	124 void CloseAudioControllerSynchronously();

87

88 void SetAudioManagerForTesting(AudioManager* audio_manager);	125 void SetAudioManagerForTesting(AudioManager* audio_manager);

89	126

90 content::SpeechRecognitionEventListener* listener_;	127 content::SpeechRecognitionEventListener* listener_;

	128 AudioManager* testing_audio_manager_;

	129 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

	130 Endpointer endpointer_;

	131 scoped_refptr<media::AudioInputController> audio_controller_;

91 int caller_id_;	132 int caller_id_;

92 std::string language_;

93 std::string grammar_;

94 bool filter_profanities_;

95 std::string hardware_info_;

96 std::string origin_url_;

97

98 scoped_ptr<SpeechRecognitionRequest> request_;

99 scoped_refptr<media::AudioInputController> audio_controller_;

100 scoped_refptr<net::URLRequestContextGetter> context_getter_;

101 AudioEncoder::Codec codec_;

102 scoped_ptr<AudioEncoder> encoder_;

103 Endpointer endpointer_;

104 int num_samples_recorded_;	133 int num_samples_recorded_;

	134 bool clipper_detected_clip_;

105 float audio_level_;	135 float audio_level_;

106 AudioManager* audio_manager_;	136 float rms_;

	137 int event_dispatch_nesting_level_;

	138 FSMState state_;

	139 FSMEvent event_;

	140 FSMEventArgs* event_args_;

107	141

108 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	142 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

109 };	143 };

110	144

111 } // namespace speech	145 } // namespace speech

112	146

113 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	147 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW