| OLD | NEW |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 7 | 7 |
| 8 #include <memory> | 8 #include <memory> |
| 9 #include <string> | 9 #include <string> |
| 10 | 10 |
| 11 #include "base/macros.h" | 11 #include "base/macros.h" |
| 12 #include "base/memory/weak_ptr.h" |
| 12 #include "content/browser/speech/endpointer/endpointer.h" | 13 #include "content/browser/speech/endpointer/endpointer.h" |
| 13 #include "content/browser/speech/speech_recognition_engine.h" | 14 #include "content/browser/speech/speech_recognition_engine.h" |
| 14 #include "content/browser/speech/speech_recognizer.h" | 15 #include "content/browser/speech/speech_recognizer.h" |
| 15 #include "content/public/common/speech_recognition_error.h" | 16 #include "content/public/common/speech_recognition_error.h" |
| 16 #include "content/public/common/speech_recognition_result.h" | 17 #include "content/public/common/speech_recognition_result.h" |
| 17 #include "media/audio/audio_input_controller.h" | 18 #include "media/audio/audio_input_controller.h" |
| 18 #include "media/audio/audio_logging.h" | 19 #include "media/audio/audio_logging.h" |
| 19 #include "net/url_request/url_request_context_getter.h" | 20 #include "net/url_request/url_request_context_getter.h" |
| 20 | 21 |
| 21 namespace media { | 22 namespace media { |
| 22 class AudioBus; | 23 class AudioBus; |
| 23 class AudioManager; | 24 class AudioSystem; |
| 24 } | 25 } |
| 25 | 26 |
| 26 namespace content { | 27 namespace content { |
| 27 | 28 |
| 28 class SpeechRecognitionEventListener; | 29 class SpeechRecognitionEventListener; |
| 29 | 30 |
| 30 // Handles speech recognition for a session (identified by |session_id|), taking | 31 // Handles speech recognition for a session (identified by |session_id|), taking |
| 31 // care of audio capture, silence detection/endpointer and interaction with the | 32 // care of audio capture, silence detection/endpointer and interaction with the |
| 32 // SpeechRecognitionEngine. | 33 // SpeechRecognitionEngine. |
| 33 class CONTENT_EXPORT SpeechRecognizerImpl | 34 class CONTENT_EXPORT SpeechRecognizerImpl |
| 34 : public SpeechRecognizer, | 35 : public SpeechRecognizer, |
| 35 public media::AudioInputController::EventHandler, | 36 public media::AudioInputController::EventHandler, |
| 36 public media::AudioInputController::SyncWriter, | 37 public media::AudioInputController::SyncWriter, |
| 37 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) { | 38 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) { |
| 38 public: | 39 public: |
| 39 static const int kAudioSampleRate; | 40 static const int kAudioSampleRate; |
| 40 static const media::ChannelLayout kChannelLayout; | 41 static const media::ChannelLayout kChannelLayout; |
| 41 static const int kNumBitsPerAudioSample; | 42 static const int kNumBitsPerAudioSample; |
| 42 static const int kNoSpeechTimeoutMs; | 43 static const int kNoSpeechTimeoutMs; |
| 43 static const int kEndpointerEstimationTimeMs; | 44 static const int kEndpointerEstimationTimeMs; |
| 44 | 45 |
| 45 static void SetAudioManagerForTesting(media::AudioManager* audio_manager); | 46 static void SetAudioSystemForTesting(media::AudioSystem* audio_system); |
| 46 | 47 |
| 47 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener, | 48 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener, |
| 49 media::AudioSystem* audio_system, |
| 48 int session_id, | 50 int session_id, |
| 49 bool continuous, | 51 bool continuous, |
| 50 bool provisional_results, | 52 bool provisional_results, |
| 51 SpeechRecognitionEngine* engine); | 53 SpeechRecognitionEngine* engine); |
| 52 | 54 |
| 53 void StartRecognition(const std::string& device_id) override; | 55 void StartRecognition(const std::string& device_id) override; |
| 54 void AbortRecognition() override; | 56 void AbortRecognition() override; |
| 55 void StopAudioCapture() override; | 57 void StopAudioCapture() override; |
| 56 bool IsActive() const override; | 58 bool IsActive() const override; |
| 57 bool IsCapturingAudio() const override; | 59 bool IsCapturingAudio() const override; |
| 58 const SpeechRecognitionEngine& recognition_engine() const; | 60 const SpeechRecognitionEngine& recognition_engine() const; |
| 59 | 61 |
| 60 private: | 62 private: |
| 61 friend class SpeechRecognizerTest; | 63 friend class SpeechRecognizerTest; |
| 62 | 64 |
| 63 enum FSMState { | 65 enum FSMState { |
| 64 STATE_IDLE = 0, | 66 STATE_IDLE = 0, |
| 67 STATE_PREPARING, |
| 65 STATE_STARTING, | 68 STATE_STARTING, |
| 66 STATE_ESTIMATING_ENVIRONMENT, | 69 STATE_ESTIMATING_ENVIRONMENT, |
| 67 STATE_WAITING_FOR_SPEECH, | 70 STATE_WAITING_FOR_SPEECH, |
| 68 STATE_RECOGNIZING, | 71 STATE_RECOGNIZING, |
| 69 STATE_WAITING_FINAL_RESULT, | 72 STATE_WAITING_FINAL_RESULT, |
| 70 STATE_ENDED, | 73 STATE_ENDED, |
| 71 STATE_MAX_VALUE = STATE_ENDED | 74 STATE_MAX_VALUE = STATE_ENDED |
| 72 }; | 75 }; |
| 73 | 76 |
| 74 enum FSMEvent { | 77 enum FSMEvent { |
| 75 EVENT_ABORT = 0, | 78 EVENT_ABORT = 0, |
| 79 EVENT_PREPARE, |
| 76 EVENT_START, | 80 EVENT_START, |
| 77 EVENT_STOP_CAPTURE, | 81 EVENT_STOP_CAPTURE, |
| 78 EVENT_AUDIO_DATA, | 82 EVENT_AUDIO_DATA, |
| 79 EVENT_ENGINE_RESULT, | 83 EVENT_ENGINE_RESULT, |
| 80 EVENT_ENGINE_ERROR, | 84 EVENT_ENGINE_ERROR, |
| 81 EVENT_AUDIO_ERROR, | 85 EVENT_AUDIO_ERROR, |
| 82 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR | 86 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR |
| 83 }; | 87 }; |
| 84 | 88 |
| 85 struct FSMEventArgs { | 89 struct FSMEventArgs { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 98 // Entry point for pushing any new external event into the recognizer FSM. | 102 // Entry point for pushing any new external event into the recognizer FSM. |
| 99 void DispatchEvent(const FSMEventArgs& event_args); | 103 void DispatchEvent(const FSMEventArgs& event_args); |
| 100 | 104 |
| 101 // Defines the behavior of the recognizer FSM, selecting the appropriate | 105 // Defines the behavior of the recognizer FSM, selecting the appropriate |
| 102 // transition according to the current state and event. | 106 // transition according to the current state and event. |
| 103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); | 107 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); |
| 104 | 108 |
| 105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). | 109 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). |
| 106 void ProcessAudioPipeline(const AudioChunk& raw_audio); | 110 void ProcessAudioPipeline(const AudioChunk& raw_audio); |
| 107 | 111 |
| 112 // Callback from AudioSystem. |
| 113 void OnDeviceInfo(const media::AudioParameters& params); |
| 114 |
| 108 // The methods below handle transitions of the recognizer FSM. | 115 // The methods below handle transitions of the recognizer FSM. |
| 116 FSMState PrepareRecognition(const FSMEventArgs&); |
| 109 FSMState StartRecording(const FSMEventArgs& event_args); | 117 FSMState StartRecording(const FSMEventArgs& event_args); |
| 110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); | 118 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); |
| 111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); | 119 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); |
| 112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); | 120 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); |
| 113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); | 121 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); |
| 114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); | 122 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); |
| 115 FSMState ProcessFinalResult(const FSMEventArgs& event_args); | 123 FSMState ProcessFinalResult(const FSMEventArgs& event_args); |
| 116 FSMState AbortSilently(const FSMEventArgs& event_args); | 124 FSMState AbortSilently(const FSMEventArgs& event_args); |
| 117 FSMState AbortWithError(const FSMEventArgs& event_args); | 125 FSMState AbortWithError(const FSMEventArgs& event_args); |
| 118 FSMState Abort(const SpeechRecognitionError& error); | 126 FSMState Abort(const SpeechRecognitionError& error); |
| (...skipping 27 matching lines...) Expand all Loading... |
| 146 uint32_t hardware_delay_bytes) override; | 154 uint32_t hardware_delay_bytes) override; |
| 147 void Close() override; | 155 void Close() override; |
| 148 | 156 |
| 149 // SpeechRecognitionEngineDelegate methods. | 157 // SpeechRecognitionEngineDelegate methods. |
| 150 void OnSpeechRecognitionEngineResults( | 158 void OnSpeechRecognitionEngineResults( |
| 151 const SpeechRecognitionResults& results) override; | 159 const SpeechRecognitionResults& results) override; |
| 152 void OnSpeechRecognitionEngineEndOfUtterance() override; | 160 void OnSpeechRecognitionEngineEndOfUtterance() override; |
| 153 void OnSpeechRecognitionEngineError( | 161 void OnSpeechRecognitionEngineError( |
| 154 const SpeechRecognitionError& error) override; | 162 const SpeechRecognitionError& error) override; |
| 155 | 163 |
| 156 static media::AudioManager* audio_manager_for_tests_; | 164 media::AudioSystem* GetAudioSystem(); |
| 157 | 165 |
| 166 // Substitutes the real audio system in browser tests. |
| 167 static media::AudioSystem* audio_system_for_tests_; |
| 168 media::AudioSystem* audio_system_; |
| 158 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_; | 169 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_; |
| 159 Endpointer endpointer_; | 170 Endpointer endpointer_; |
| 160 scoped_refptr<media::AudioInputController> audio_controller_; | 171 scoped_refptr<media::AudioInputController> audio_controller_; |
| 161 std::unique_ptr<media::AudioLog> audio_log_; | 172 std::unique_ptr<media::AudioLog> audio_log_; |
| 162 int num_samples_recorded_; | 173 int num_samples_recorded_; |
| 163 float audio_level_; | 174 float audio_level_; |
| 164 bool is_dispatching_event_; | 175 bool is_dispatching_event_; |
| 165 bool provisional_results_; | 176 bool provisional_results_; |
| 166 bool end_of_utterance_; | 177 bool end_of_utterance_; |
| 167 FSMState state_; | 178 FSMState state_; |
| 168 std::string device_id_; | 179 std::string device_id_; |
| 180 media::AudioParameters device_params_; |
| 169 | 181 |
| 170 class OnDataConverter; | 182 class OnDataConverter; |
| 171 | 183 |
| 172 // Converts data between native input format and a WebSpeech specific | 184 // Converts data between native input format and a WebSpeech specific |
| 173 // output format. | 185 // output format. |
| 174 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_; | 186 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_; |
| 175 | 187 |
| 188 base::WeakPtrFactory<SpeechRecognizerImpl> weak_ptr_factory_; |
| 176 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 189 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
| 177 }; | 190 }; |
| 178 | 191 |
| 179 } // namespace content | 192 } // namespace content |
| 180 | 193 |
| 181 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 194 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| OLD | NEW |