| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| 7 | 7 |
| 8 #include "base/basictypes.h" | 8 #include "base/basictypes.h" |
| 9 #include "base/memory/ref_counted.h" | 9 #include "base/memory/ref_counted.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 11 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
| 12 #include "content/browser/speech/speech_recognition_engine.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
| 13 #include "content/public/common/speech_recognition_error.h" | 13 #include "content/public/common/speech_recognition_error.h" |
| 14 #include "content/public/common/speech_recognition_result.h" | 14 #include "content/public/common/speech_recognition_result.h" |
| 15 #include "media/audio/audio_input_controller.h" | 15 #include "media/audio/audio_input_controller.h" |
| 16 #include "net/url_request/url_request_context_getter.h" | 16 #include "net/url_request/url_request_context_getter.h" |
| 17 | 17 |
| 18 namespace content { | |
| 19 class SpeechRecognitionEventListener; | |
| 20 } | |
| 21 | |
| 22 namespace media { | 18 namespace media { |
| 23 class AudioManager; | 19 class AudioManager; |
| 24 } | 20 } |
| 25 | 21 |
| 26 namespace speech { | 22 namespace content { |
| 23 |
| 24 class SpeechRecognitionEventListener; |
| 25 |
| 27 // Handles speech recognition for a session (identified by |session_id|), taking | 26 // Handles speech recognition for a session (identified by |session_id|), taking |
| 28 // care of audio capture, silence detection/endpointer and interaction with the | 27 // care of audio capture, silence detection/endpointer and interaction with the |
| 29 // SpeechRecognitionEngine. | 28 // SpeechRecognitionEngine. |
| 30 class CONTENT_EXPORT SpeechRecognizer | 29 class CONTENT_EXPORT SpeechRecognizer |
| 31 : public base::RefCountedThreadSafe<SpeechRecognizer>, | 30 : public base::RefCountedThreadSafe<SpeechRecognizer>, |
| 32 public media::AudioInputController::EventHandler, | 31 public media::AudioInputController::EventHandler, |
| 33 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { | 32 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { |
| 34 public: | 33 public: |
| 35 static const int kAudioSampleRate; | 34 static const int kAudioSampleRate; |
| 36 static const media::ChannelLayout kChannelLayout; | 35 static const media::ChannelLayout kChannelLayout; |
| 37 static const int kNumBitsPerAudioSample; | 36 static const int kNumBitsPerAudioSample; |
| 38 static const int kNoSpeechTimeoutMs; | 37 static const int kNoSpeechTimeoutMs; |
| 39 static const int kEndpointerEstimationTimeMs; | 38 static const int kEndpointerEstimationTimeMs; |
| 40 | 39 |
| 41 static void SetAudioManagerForTests(media::AudioManager* audio_manager); | 40 static void SetAudioManagerForTests(media::AudioManager* audio_manager); |
| 42 | 41 |
| 43 SpeechRecognizer( | 42 SpeechRecognizer(SpeechRecognitionEventListener* listener, |
| 44 content::SpeechRecognitionEventListener* listener, | 43 int session_id, |
| 45 int session_id, | 44 bool is_single_shot, |
| 46 bool is_single_shot, | 45 SpeechRecognitionEngine* engine); |
| 47 SpeechRecognitionEngine* engine); | |
| 48 | 46 |
| 49 void StartRecognition(); | 47 void StartRecognition(); |
| 50 void AbortRecognition(); | 48 void AbortRecognition(); |
| 51 void StopAudioCapture(); | 49 void StopAudioCapture(); |
| 52 bool IsActive() const; | 50 bool IsActive() const; |
| 53 bool IsCapturingAudio() const; | 51 bool IsCapturingAudio() const; |
| 54 const SpeechRecognitionEngine& recognition_engine() const; | 52 const SpeechRecognitionEngine& recognition_engine() const; |
| 55 | 53 |
| 56 private: | 54 private: |
| 57 friend class base::RefCountedThreadSafe<SpeechRecognizer>; | 55 friend class base::RefCountedThreadSafe<SpeechRecognizer>; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 78 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR | 76 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR |
| 79 }; | 77 }; |
| 80 | 78 |
| 81 struct FSMEventArgs { | 79 struct FSMEventArgs { |
| 82 explicit FSMEventArgs(FSMEvent event_value); | 80 explicit FSMEventArgs(FSMEvent event_value); |
| 83 ~FSMEventArgs(); | 81 ~FSMEventArgs(); |
| 84 | 82 |
| 85 FSMEvent event; | 83 FSMEvent event; |
| 86 int audio_error_code; | 84 int audio_error_code; |
| 87 scoped_refptr<AudioChunk> audio_data; | 85 scoped_refptr<AudioChunk> audio_data; |
| 88 content::SpeechRecognitionResult engine_result; | 86 SpeechRecognitionResult engine_result; |
| 89 content::SpeechRecognitionError engine_error; | 87 SpeechRecognitionError engine_error; |
| 90 }; | 88 }; |
| 91 | 89 |
| 92 virtual ~SpeechRecognizer(); | 90 virtual ~SpeechRecognizer(); |
| 93 | 91 |
| 94 // Entry point for pushing any new external event into the recognizer FSM. | 92 // Entry point for pushing any new external event into the recognizer FSM. |
| 95 void DispatchEvent(const FSMEventArgs& event_args); | 93 void DispatchEvent(const FSMEventArgs& event_args); |
| 96 | 94 |
| 97 // Defines the behavior of the recognizer FSM, selecting the appropriate | 95 // Defines the behavior of the recognizer FSM, selecting the appropriate |
| 98 // transition according to the current state and event. | 96 // transition according to the current state and event. |
| 99 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); | 97 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); |
| 100 | 98 |
| 101 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). | 99 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). |
| 102 void ProcessAudioPipeline(const AudioChunk& raw_audio); | 100 void ProcessAudioPipeline(const AudioChunk& raw_audio); |
| 103 | 101 |
| 104 // The methods below handle transitions of the recognizer FSM. | 102 // The methods below handle transitions of the recognizer FSM. |
| 105 FSMState StartRecording(const FSMEventArgs& event_args); | 103 FSMState StartRecording(const FSMEventArgs& event_args); |
| 106 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); | 104 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); |
| 107 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); | 105 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); |
| 108 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); | 106 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); |
| 109 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); | 107 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); |
| 110 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); | 108 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); |
| 111 FSMState ProcessFinalResult(const FSMEventArgs& event_args); | 109 FSMState ProcessFinalResult(const FSMEventArgs& event_args); |
| 112 FSMState AbortSilently(const FSMEventArgs& event_args); | 110 FSMState AbortSilently(const FSMEventArgs& event_args); |
| 113 FSMState AbortWithError(const FSMEventArgs& event_args); | 111 FSMState AbortWithError(const FSMEventArgs& event_args); |
| 114 FSMState Abort(const content::SpeechRecognitionError& error); | 112 FSMState Abort(const SpeechRecognitionError& error); |
| 115 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args); | 113 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args); |
| 116 FSMState DoNothing(const FSMEventArgs& event_args) const; | 114 FSMState DoNothing(const FSMEventArgs& event_args) const; |
| 117 FSMState NotFeasible(const FSMEventArgs& event_args); | 115 FSMState NotFeasible(const FSMEventArgs& event_args); |
| 118 | 116 |
| 119 // Returns the time span of captured audio samples since the start of capture. | 117 // Returns the time span of captured audio samples since the start of capture. |
| 120 int GetElapsedTimeMs() const; | 118 int GetElapsedTimeMs() const; |
| 121 | 119 |
| 122 // Calculates the input volume to be displayed in the UI, triggering the | 120 // Calculates the input volume to be displayed in the UI, triggering the |
| 123 // OnAudioLevelsChange event accordingly. | 121 // OnAudioLevelsChange event accordingly. |
| 124 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected); | 122 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected); |
| 125 | 123 |
| 126 void CloseAudioControllerAsynchronously(); | 124 void CloseAudioControllerAsynchronously(); |
| 127 | 125 |
| 128 // Callback called on IO thread by audio_controller->Close(). | 126 // Callback called on IO thread by audio_controller->Close(). |
| 129 void OnAudioClosed(media::AudioInputController*); | 127 void OnAudioClosed(media::AudioInputController*); |
| 130 | 128 |
| 131 // AudioInputController::EventHandler methods. | 129 // AudioInputController::EventHandler methods. |
| 132 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 130 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
| 133 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 131 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
| 134 virtual void OnError(media::AudioInputController* controller, | 132 virtual void OnError(media::AudioInputController* controller, |
| 135 int error_code) OVERRIDE; | 133 int error_code) OVERRIDE; |
| 136 virtual void OnData(media::AudioInputController* controller, | 134 virtual void OnData(media::AudioInputController* controller, |
| 137 const uint8* data, uint32 size) OVERRIDE; | 135 const uint8* data, uint32 size) OVERRIDE; |
| 138 | 136 |
| 139 // SpeechRecognitionEngineDelegate methods. | 137 // SpeechRecognitionEngineDelegate methods. |
| 140 virtual void OnSpeechRecognitionEngineResult( | 138 virtual void OnSpeechRecognitionEngineResult( |
| 141 const content::SpeechRecognitionResult& result) OVERRIDE; | 139 const SpeechRecognitionResult& result) OVERRIDE; |
| 142 virtual void OnSpeechRecognitionEngineError( | 140 virtual void OnSpeechRecognitionEngineError( |
| 143 const content::SpeechRecognitionError& error) OVERRIDE; | 141 const SpeechRecognitionError& error) OVERRIDE; |
| 144 | 142 |
| 145 static media::AudioManager* audio_manager_for_tests_; | 143 static media::AudioManager* audio_manager_for_tests_; |
| 146 | 144 |
| 147 content::SpeechRecognitionEventListener* listener_; | 145 SpeechRecognitionEventListener* listener_; |
| 148 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | 146 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; |
| 149 Endpointer endpointer_; | 147 Endpointer endpointer_; |
| 150 scoped_refptr<media::AudioInputController> audio_controller_; | 148 scoped_refptr<media::AudioInputController> audio_controller_; |
| 151 int session_id_; | 149 int session_id_; |
| 152 int num_samples_recorded_; | 150 int num_samples_recorded_; |
| 153 float audio_level_; | 151 float audio_level_; |
| 154 bool is_dispatching_event_; | 152 bool is_dispatching_event_; |
| 155 bool is_single_shot_; | 153 bool is_single_shot_; |
| 156 FSMState state_; | 154 FSMState state_; |
| 157 | 155 |
| 158 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); | 156 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); |
| 159 }; | 157 }; |
| 160 | 158 |
| 161 } // namespace speech | 159 } // namespace content |
| 162 | 160 |
| 163 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 161 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| OLD | NEW |