Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 7 | 7 |
| 8 #include "base/basictypes.h" | 8 #include "base/basictypes.h" |
| 9 #include "base/memory/ref_counted.h" | 9 #include "base/memory/ref_counted.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
| 12 #include "content/browser/speech/speech_recognition_engine.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
| 13 #include "content/public/browser/speech_recognizer.h" | 13 #include "content/public/browser/speech_recognizer.h" |
| 14 #include "content/public/common/speech_recognition_error.h" | 14 #include "content/public/common/speech_recognition_error.h" |
| 15 #include "content/public/common/speech_recognition_result.h" | |
| 15 #include "media/audio/audio_input_controller.h" | 16 #include "media/audio/audio_input_controller.h" |
| 16 #include "net/url_request/url_request_context_getter.h" | 17 #include "net/url_request/url_request_context_getter.h" |
| 17 | 18 |
| 18 namespace content { | 19 namespace content { |
| 19 class SpeechRecognitionEventListener; | 20 class SpeechRecognitionEventListener; |
| 20 struct SpeechRecognitionResult; | |
| 21 } | |
| 22 | |
| 23 namespace media { | |
| 24 class AudioInputController; | |
| 25 } | 21 } |
| 26 | 22 |
| 27 namespace speech { | 23 namespace speech { |
| 24 // TODO(primiano) Next CL: Remove the Impl suffix and the exported | |
| 25 // /content/public/browser/speech_recognizer.h interface since this class should | |
| 26 // not be visible outside (currently we need it for speech input extension API). | |
| 28 | 27 |
| 29 // Records audio, sends recorded audio to server and translates server response | 28 // Handles speech recognition for a session (identified by |caller_id|), taking |
| 30 // to recognition result. | 29 // care of audio capture, silence detection/endpointer and interaction with the |
| 30 // SpeechRecognitionEngine. | |
| 31 class CONTENT_EXPORT SpeechRecognizerImpl | 31 class CONTENT_EXPORT SpeechRecognizerImpl |
| 32 : public NON_EXPORTED_BASE(content::SpeechRecognizer), | 32 : public NON_EXPORTED_BASE(content::SpeechRecognizer), |
| 33 public media::AudioInputController::EventHandler, | 33 public media::AudioInputController::EventHandler, |
| 34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { | 34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { |
| 35 public: | 35 public: |
| 36 static const int kAudioSampleRate; | 36 static const int kAudioSampleRate; |
| 37 static const ChannelLayout kChannelLayout; | 37 static const ChannelLayout kChannelLayout; |
| 38 static const int kNumBitsPerAudioSample; | 38 static const int kNumBitsPerAudioSample; |
| 39 static const int kNoSpeechTimeoutMs; | 39 static const int kNoSpeechTimeoutMs; |
| 40 static const int kEndpointerEstimationTimeMs; | 40 static const int kEndpointerEstimationTimeMs; |
| 41 | 41 |
| 42 SpeechRecognizerImpl( | 42 SpeechRecognizerImpl( |
| 43 content::SpeechRecognitionEventListener* listener, | 43 content::SpeechRecognitionEventListener* listener, |
| 44 int caller_id, | 44 int caller_id, |
| 45 const std::string& language, | 45 SpeechRecognitionEngine* engine); |
| 46 const std::string& grammar, | |
| 47 net::URLRequestContextGetter* context_getter, | |
| 48 bool filter_profanities, | |
| 49 const std::string& hardware_info, | |
| 50 const std::string& origin_url); | |
| 51 virtual ~SpeechRecognizerImpl(); | 46 virtual ~SpeechRecognizerImpl(); |
| 52 | 47 |
| 53 // content::SpeechRecognizer methods. | 48 // content::SpeechRecognizer methods. |
| 54 virtual void StartRecognition() OVERRIDE; | 49 virtual void StartRecognition() OVERRIDE; |
| 55 virtual void AbortRecognition() OVERRIDE; | 50 virtual void AbortRecognition() OVERRIDE; |
| 56 virtual void StopAudioCapture() OVERRIDE; | 51 virtual void StopAudioCapture() OVERRIDE; |
| 57 virtual bool IsActive() const OVERRIDE; | 52 virtual bool IsActive() const OVERRIDE; |
| 58 virtual bool IsCapturingAudio() const OVERRIDE; | 53 virtual bool IsCapturingAudio() const OVERRIDE; |
| 59 const SpeechRecognitionEngine& recognition_engine() const; | 54 const SpeechRecognitionEngine& recognition_engine() const; |
| 60 | 55 |
| 56 private: | |
| 57 friend class SpeechRecognizerImplTest; | |
| 58 | |
| 59 enum FSMState { | |
| 60 STATE_IDLE = 0, | |
| 61 STATE_STARTING, | |
| 62 STATE_ESTIMATING_ENVIRONMENT, | |
| 63 STATE_WAITING_FOR_SPEECH, | |
| 64 STATE_RECOGNIZING, | |
| 65 STATE_WAITING_FINAL_RESULT, | |
| 66 STATE_MAX = STATE_WAITING_FINAL_RESULT | |
| 67 }; | |
| 68 | |
| 69 enum FSMEvent { | |
| 70 EVENT_ABORT = 0, | |
| 71 EVENT_START, | |
| 72 EVENT_STOP_CAPTURE, | |
| 73 EVENT_AUDIO_DATA, | |
| 74 EVENT_ENGINE_RESULT, | |
| 75 EVENT_ENGINE_ERROR, | |
| 76 EVENT_AUDIO_ERROR, | |
| 77 EVENT_MAX = EVENT_AUDIO_ERROR | |
| 78 }; | |
| 79 | |
| 80 struct FSMEventArgs { | |
| 81 FSMEvent event; | |
| 82 int audio_error_code; | |
| 83 scoped_refptr<AudioChunk> audio_data; | |
| 84 content::SpeechRecognitionResult engine_result; | |
| 85 content::SpeechRecognitionError engine_error; | |
| 86 FSMEventArgs(); | |
| 87 ~FSMEventArgs(); | |
| 88 }; | |
| 89 | |
| 90 typedef base::Callback<FSMState(const FSMEventArgs&)> TransitionFunction; | |
| 91 | |
| 61 // AudioInputController::EventHandler methods. | 92 // AudioInputController::EventHandler methods. |
| 62 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 93 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
| 63 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 94 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
| 64 virtual void OnError(media::AudioInputController* controller, | 95 virtual void OnError(media::AudioInputController* controller, |
| 65 int error_code) OVERRIDE; | 96 int error_code) OVERRIDE; |
| 66 virtual void OnData(media::AudioInputController* controller, | 97 virtual void OnData(media::AudioInputController* controller, |
| 67 const uint8* data, | 98 const uint8* data, uint32 size) OVERRIDE; |
| 68 uint32 size) OVERRIDE; | 99 // Callback called on IO thread by audio_controller->Close(). |
|
hans
2012/04/02 16:05:59
nit: maybe put a blank line before this, since it'
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Done.
| |
| 100 void OnAudioClosed(media::AudioInputController*); | |
| 69 | 101 |
| 70 // SpeechRecognitionEngineDelegate methods. | 102 // SpeechRecognitionEngineDelegate methods. |
| 71 virtual void OnSpeechRecognitionEngineResult( | 103 virtual void OnSpeechRecognitionEngineResult( |
| 72 const content::SpeechRecognitionResult& result) OVERRIDE; | 104 const content::SpeechRecognitionResult& result) OVERRIDE; |
| 73 virtual void OnSpeechRecognitionEngineError( | 105 virtual void OnSpeechRecognitionEngineError( |
| 74 const content::SpeechRecognitionError& error) OVERRIDE; | 106 const content::SpeechRecognitionError& error) OVERRIDE; |
| 75 | 107 |
| 76 private: | 108 void InitializeFSM(); |
| 77 friend class SpeechRecognizerImplTest; | 109 void DispatchEvent(FSMEvent, FSMEventArgs); |
| 78 | 110 void ProcessAudioPipeline(const AudioChunk& raw_audio); |
| 79 void InformErrorAndAbortRecognition( | 111 FSMState StartRecording(const FSMEventArgs& event_args); |
| 80 content::SpeechRecognitionErrorCode error); | 112 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); |
| 81 void SendRecordedAudioToServer(); | 113 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); |
| 82 | 114 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); |
| 83 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 115 FSMState StopCaptureAndWaitResult(const FSMEventArgs& event_args); |
| 84 | 116 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); |
| 85 // Handles OnData in the IO thread. | 117 FSMState ProcessFinalResult(const FSMEventArgs& event_args); |
| 86 void HandleOnData(scoped_refptr<AudioChunk> raw_audio); | 118 FSMState Abort(const FSMEventArgs& event_args); |
| 87 | 119 FSMState AbortWithError(const content::SpeechRecognitionError* error); |
| 88 void OnAudioClosed(media::AudioInputController*); | 120 FSMState AbortWithError(const content::SpeechRecognitionError& error); |
| 89 | 121 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args); |
| 90 // Helper method which closes the audio controller and frees it asynchronously | 122 FSMState DoNothing(const FSMEventArgs& event_args) const; |
| 91 // without blocking the IO thread. | 123 int GetElapsedTimeMs() const; |
| 124 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected); | |
| 92 void CloseAudioControllerAsynchronously(); | 125 void CloseAudioControllerAsynchronously(); |
| 93 | |
| 94 void SetAudioManagerForTesting(AudioManager* audio_manager); | 126 void SetAudioManagerForTesting(AudioManager* audio_manager); |
| 95 | 127 |
| 96 content::SpeechRecognitionEventListener* listener_; | 128 content::SpeechRecognitionEventListener* listener_; |
| 97 AudioManager* testing_audio_manager_; | 129 AudioManager* testing_audio_manager_; |
| 98 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | 130 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; |
| 99 Endpointer endpointer_; | 131 Endpointer endpointer_; |
| 100 scoped_refptr<media::AudioInputController> audio_controller_; | 132 scoped_refptr<media::AudioInputController> audio_controller_; |
| 101 scoped_refptr<net::URLRequestContextGetter> context_getter_; | |
| 102 int caller_id_; | 133 int caller_id_; |
| 103 std::string language_; | |
| 104 std::string grammar_; | |
| 105 bool filter_profanities_; | |
| 106 std::string hardware_info_; | |
| 107 std::string origin_url_; | |
| 108 int num_samples_recorded_; | 134 int num_samples_recorded_; |
| 109 float audio_level_; | 135 float audio_level_; |
| 136 bool in_event_dispatching_; | |
| 137 FSMState state_; | |
| 138 TransitionFunction fsm[STATE_MAX + 1][EVENT_MAX + 1]; | |
| 139 const TransitionFunction kUnfeasibleTransition; | |
| 110 | 140 |
| 111 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 141 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
| 112 }; | 142 }; |
| 113 | 143 |
| 114 } // namespace speech | 144 } // namespace speech |
| 115 | 145 |
| 116 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 146 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| OLD | NEW |