Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 7 | 7 |
| 8 #include "base/basictypes.h" | 8 #include "base/basictypes.h" |
| 9 #include "base/memory/ref_counted.h" | 9 #include "base/memory/ref_counted.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
| 12 #include "content/browser/speech/speech_recognition_engine.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
| 13 #include "content/public/browser/speech_recognizer.h" | 13 #include "content/public/browser/speech_recognizer.h" |
| 14 #include "content/public/common/speech_recognition_error.h" | 14 #include "content/public/common/speech_recognition_error.h" |
| 15 #include "content/public/common/speech_recognition_result.h" | |
| 15 #include "media/audio/audio_input_controller.h" | 16 #include "media/audio/audio_input_controller.h" |
| 16 #include "net/url_request/url_request_context_getter.h" | 17 #include "net/url_request/url_request_context_getter.h" |
| 17 | 18 |
| 18 namespace content { | 19 namespace content { |
| 19 class SpeechRecognitionEventListener; | 20 class SpeechRecognitionEventListener; |
| 20 struct SpeechRecognitionResult; | 21 struct SpeechRecognitionResult; |
| 21 } | 22 } |
| 22 | 23 |
| 23 namespace media { | 24 namespace media { |
| 24 class AudioInputController; | 25 class AudioInputController; |
| 25 class AudioManager; | 26 class AudioManager; |
| 26 } | 27 } |
| 27 | 28 |
| 28 namespace speech { | 29 namespace speech { |
| 29 | 30 |
| 30 // Records audio, sends recorded audio to server and translates server response | 31 // TODO(primiano) Next CL: Remove the Impl suffix and the exported |
| 31 // to recognition result. | 32 // /content/public/browser/speech_recognizer.h interface since this class should |
| 33 // not be visible outside (currently we need it for speech input extension API). | |
| 34 | |
| 35 // Handles speech recognition for a session (identified by |caller_id|), taking | |
| 36 // care of audio capture, silence detection/endpointer and interaction with the | |
| 37 // SpeechRecognitionEngine. | |
| 32 class CONTENT_EXPORT SpeechRecognizerImpl | 38 class CONTENT_EXPORT SpeechRecognizerImpl |
| 33 : public NON_EXPORTED_BASE(content::SpeechRecognizer), | 39 : public NON_EXPORTED_BASE(content::SpeechRecognizer), |
| 34 public media::AudioInputController::EventHandler, | 40 public media::AudioInputController::EventHandler, |
| 35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { | 41 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { |
| 36 public: | 42 public: |
| 37 static const int kAudioSampleRate; | 43 static const int kAudioSampleRate; |
| 38 static const ChannelLayout kChannelLayout; | 44 static const ChannelLayout kChannelLayout; |
| 39 static const int kNumBitsPerAudioSample; | 45 static const int kNumBitsPerAudioSample; |
| 40 static const int kNoSpeechTimeoutMs; | 46 static const int kNoSpeechTimeoutMs; |
| 41 static const int kEndpointerEstimationTimeMs; | 47 static const int kEndpointerEstimationTimeMs; |
| 42 | 48 |
| 43 SpeechRecognizerImpl( | 49 SpeechRecognizerImpl( |
| 44 content::SpeechRecognitionEventListener* listener, | 50 content::SpeechRecognitionEventListener* listener, |
| 45 int caller_id, | 51 int caller_id, |
| 46 const std::string& language, | 52 SpeechRecognitionEngine* engine); |
| 47 const std::string& grammar, | |
| 48 net::URLRequestContextGetter* context_getter, | |
| 49 bool filter_profanities, | |
| 50 const std::string& hardware_info, | |
| 51 const std::string& origin_url); | |
| 52 virtual ~SpeechRecognizerImpl(); | 53 virtual ~SpeechRecognizerImpl(); |
| 53 | 54 |
| 54 // content::SpeechRecognizer methods. | 55 // content::SpeechRecognizer methods. |
| 55 virtual void StartRecognition() OVERRIDE; | 56 virtual void StartRecognition() OVERRIDE; |
| 56 virtual void AbortRecognition() OVERRIDE; | 57 virtual void AbortRecognition() OVERRIDE; |
| 57 virtual void StopAudioCapture() OVERRIDE; | 58 virtual void StopAudioCapture() OVERRIDE; |
| 58 virtual bool IsActive() const OVERRIDE; | 59 virtual bool IsActive() const OVERRIDE; |
| 59 virtual bool IsCapturingAudio() const OVERRIDE; | 60 virtual bool IsCapturingAudio() const OVERRIDE; |
| 60 const SpeechRecognitionEngine& recognition_engine() const; | 61 const SpeechRecognitionEngine& recognition_engine() const; |
| 61 | 62 |
| 63 private: | |
| 64 friend class SpeechRecognizerImplTest; | |
| 65 | |
| 66 enum FSMState { | |
| 67 STATE_IDLE = 0, | |
| 68 STATE_STARTING, | |
| 69 STATE_ESTIMATING_ENVIRONMENT, | |
| 70 STATE_WAITING_FOR_SPEECH, | |
| 71 STATE_RECOGNIZING, | |
| 72 STATE_WAITING_FINAL_RESULT, | |
| 73 STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT | |
|
Primiano Tucci (use gerrit)
2012/04/12 17:38:06
Renamed due to a name clash on windows (STATE_MAX
| |
| 74 }; | |
| 75 | |
| 76 enum FSMEvent { | |
| 77 EVENT_ABORT = 0, | |
| 78 EVENT_START, | |
| 79 EVENT_STOP_CAPTURE, | |
| 80 EVENT_AUDIO_DATA, | |
| 81 EVENT_ENGINE_RESULT, | |
| 82 EVENT_ENGINE_ERROR, | |
| 83 EVENT_AUDIO_ERROR, | |
| 84 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR | |
| 85 }; | |
| 86 | |
| 87 struct FSMEventArgs { | |
| 88 explicit FSMEventArgs(FSMEvent event_value); | |
| 89 ~FSMEventArgs(); | |
| 90 | |
| 91 FSMEvent event; | |
| 92 int audio_error_code; | |
| 93 scoped_refptr<AudioChunk> audio_data; | |
| 94 content::SpeechRecognitionResult engine_result; | |
| 95 content::SpeechRecognitionError engine_error; | |
| 96 }; | |
| 97 | |
| 98 // Entry point for pushing any new external event into the recognizer FSM. | |
| 99 void DispatchEvent(const FSMEventArgs& event_args); | |
| 100 | |
| 101 // Defines the behavior of the recognizer FSM, selecting the appropriate | |
| 102 // transition according to the current state and event. | |
| 103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); | |
| 104 | |
| 105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). | |
| 106 void ProcessAudioPipeline(const AudioChunk& raw_audio); | |
| 107 | |
| 108 // The methods below handle transitions of the recognizer FSM. | |
| 109 FSMState StartRecording(const FSMEventArgs& event_args); | |
| 110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); | |
| 111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); | |
| 112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); | |
| 113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); | |
| 114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); | |
| 115 FSMState ProcessFinalResult(const FSMEventArgs& event_args); | |
| 116 FSMState Abort(const FSMEventArgs& event_args); | |
| 117 FSMState AbortWithError(const content::SpeechRecognitionError* error); | |
| 118 FSMState AbortWithError(const content::SpeechRecognitionError& error); | |
| 119 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args); | |
| 120 FSMState DoNothing(const FSMEventArgs& event_args) const; | |
| 121 FSMState NotFeasible(const FSMEventArgs& event_args); | |
| 122 | |
| 123 // Returns the time span of captured audio samples since the start of capture. | |
| 124 int GetElapsedTimeMs() const; | |
| 125 | |
| 126 // Calculates the input volume to be displayed in the UI, triggering the | |
| 127 // OnAudioLevelsChange event accordingly. | |
| 128 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected); | |
| 129 | |
| 130 void CloseAudioControllerAsynchronously(); | |
| 131 void SetAudioManagerForTesting(media::AudioManager* audio_manager); | |
| 132 | |
| 133 // Callback called on IO thread by audio_controller->Close(). | |
| 134 void OnAudioClosed(media::AudioInputController*); | |
| 135 | |
| 62 // AudioInputController::EventHandler methods. | 136 // AudioInputController::EventHandler methods. |
| 63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 137 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
| 64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 138 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
| 65 virtual void OnError(media::AudioInputController* controller, | 139 virtual void OnError(media::AudioInputController* controller, |
| 66 int error_code) OVERRIDE; | 140 int error_code) OVERRIDE; |
| 67 virtual void OnData(media::AudioInputController* controller, | 141 virtual void OnData(media::AudioInputController* controller, |
| 68 const uint8* data, | 142 const uint8* data, uint32 size) OVERRIDE; |
| 69 uint32 size) OVERRIDE; | |
| 70 | 143 |
| 71 // SpeechRecognitionEngineDelegate methods. | 144 // SpeechRecognitionEngineDelegate methods. |
| 72 virtual void OnSpeechRecognitionEngineResult( | 145 virtual void OnSpeechRecognitionEngineResult( |
| 73 const content::SpeechRecognitionResult& result) OVERRIDE; | 146 const content::SpeechRecognitionResult& result) OVERRIDE; |
| 74 virtual void OnSpeechRecognitionEngineError( | 147 virtual void OnSpeechRecognitionEngineError( |
| 75 const content::SpeechRecognitionError& error) OVERRIDE; | 148 const content::SpeechRecognitionError& error) OVERRIDE; |
| 76 | 149 |
| 77 private: | |
| 78 friend class SpeechRecognizerImplTest; | |
| 79 | |
| 80 void InformErrorAndAbortRecognition( | |
| 81 content::SpeechRecognitionErrorCode error); | |
| 82 void SendRecordedAudioToServer(); | |
| 83 | |
| 84 void HandleOnError(int error_code); // Handles OnError in the IO thread. | |
| 85 | |
| 86 // Handles OnData in the IO thread. | |
| 87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio); | |
| 88 | |
| 89 void OnAudioClosed(media::AudioInputController*); | |
| 90 | |
| 91 // Helper method which closes the audio controller and frees it asynchronously | |
| 92 // without blocking the IO thread. | |
| 93 void CloseAudioControllerAsynchronously(); | |
| 94 | |
| 95 void SetAudioManagerForTesting(media::AudioManager* audio_manager); | |
| 96 | |
| 97 content::SpeechRecognitionEventListener* listener_; | 150 content::SpeechRecognitionEventListener* listener_; |
| 98 media::AudioManager* testing_audio_manager_; | 151 media::AudioManager* testing_audio_manager_; |
| 99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | 152 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; |
| 100 Endpointer endpointer_; | 153 Endpointer endpointer_; |
| 101 scoped_refptr<media::AudioInputController> audio_controller_; | 154 scoped_refptr<media::AudioInputController> audio_controller_; |
| 102 scoped_refptr<net::URLRequestContextGetter> context_getter_; | |
| 103 int caller_id_; | 155 int caller_id_; |
| 104 std::string language_; | |
| 105 std::string grammar_; | |
| 106 bool filter_profanities_; | |
| 107 std::string hardware_info_; | |
| 108 std::string origin_url_; | |
| 109 int num_samples_recorded_; | 156 int num_samples_recorded_; |
| 110 float audio_level_; | 157 float audio_level_; |
| 158 bool is_dispatching_event_; | |
| 159 FSMState state_; | |
| 111 | 160 |
| 112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 161 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
| 113 }; | 162 }; |
| 114 | 163 |
| 115 } // namespace speech | 164 } // namespace speech |
| 116 | 165 |
| 117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 166 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| OLD | NEW |