OLD | NEW |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
7 | 7 |
8 #include <memory> | 8 #include <memory> |
9 | 9 |
10 #include "base/macros.h" | 10 #include "base/macros.h" |
| 11 #include "base/memory/weak_ptr.h" |
11 #include "content/browser/speech/endpointer/endpointer.h" | 12 #include "content/browser/speech/endpointer/endpointer.h" |
12 #include "content/browser/speech/speech_recognition_engine.h" | 13 #include "content/browser/speech/speech_recognition_engine.h" |
13 #include "content/browser/speech/speech_recognizer.h" | 14 #include "content/browser/speech/speech_recognizer.h" |
14 #include "content/public/common/speech_recognition_error.h" | 15 #include "content/public/common/speech_recognition_error.h" |
15 #include "content/public/common/speech_recognition_result.h" | 16 #include "content/public/common/speech_recognition_result.h" |
16 #include "media/audio/audio_input_controller.h" | 17 #include "media/audio/audio_input_controller.h" |
17 #include "media/audio/audio_logging.h" | 18 #include "media/audio/audio_logging.h" |
18 #include "net/url_request/url_request_context_getter.h" | 19 #include "net/url_request/url_request_context_getter.h" |
19 | 20 |
20 namespace media { | 21 namespace media { |
21 class AudioBus; | 22 class AudioBus; |
22 class AudioManager; | 23 class AudioSystem; |
23 } | 24 } |
24 | 25 |
25 namespace content { | 26 namespace content { |
26 | 27 |
27 class SpeechRecognitionEventListener; | 28 class SpeechRecognitionEventListener; |
28 | 29 |
29 // Handles speech recognition for a session (identified by |session_id|), taking | 30 // Handles speech recognition for a session (identified by |session_id|), taking |
30 // care of audio capture, silence detection/endpointer and interaction with the | 31 // care of audio capture, silence detection/endpointer and interaction with the |
31 // SpeechRecognitionEngine. | 32 // SpeechRecognitionEngine. |
32 class CONTENT_EXPORT SpeechRecognizerImpl | 33 class CONTENT_EXPORT SpeechRecognizerImpl |
33 : public SpeechRecognizer, | 34 : public SpeechRecognizer, |
34 public media::AudioInputController::EventHandler, | 35 public media::AudioInputController::EventHandler, |
35 public media::AudioInputController::SyncWriter, | 36 public media::AudioInputController::SyncWriter, |
36 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) { | 37 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) { |
37 public: | 38 public: |
38 static const int kAudioSampleRate; | 39 static const int kAudioSampleRate; |
39 static const media::ChannelLayout kChannelLayout; | 40 static const media::ChannelLayout kChannelLayout; |
40 static const int kNumBitsPerAudioSample; | 41 static const int kNumBitsPerAudioSample; |
41 static const int kNoSpeechTimeoutMs; | 42 static const int kNoSpeechTimeoutMs; |
42 static const int kEndpointerEstimationTimeMs; | 43 static const int kEndpointerEstimationTimeMs; |
43 | 44 |
44 static void SetAudioManagerForTesting(media::AudioManager* audio_manager); | 45 static void SetAudioSystemForTesting(media::AudioSystem* audio_system); |
45 | 46 |
46 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener, | 47 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener, |
| 48 media::AudioSystem* audio_system, |
47 int session_id, | 49 int session_id, |
48 bool continuous, | 50 bool continuous, |
49 bool provisional_results, | 51 bool provisional_results, |
50 SpeechRecognitionEngine* engine); | 52 SpeechRecognitionEngine* engine); |
51 | 53 |
52 void StartRecognition(const std::string& device_id) override; | 54 void StartRecognition(const std::string& device_id) override; |
53 void AbortRecognition() override; | 55 void AbortRecognition() override; |
54 void StopAudioCapture() override; | 56 void StopAudioCapture() override; |
55 bool IsActive() const override; | 57 bool IsActive() const override; |
56 bool IsCapturingAudio() const override; | 58 bool IsCapturingAudio() const override; |
57 const SpeechRecognitionEngine& recognition_engine() const; | 59 const SpeechRecognitionEngine& recognition_engine() const; |
58 | 60 |
59 private: | 61 private: |
60 friend class SpeechRecognizerTest; | 62 friend class SpeechRecognizerTest; |
61 | 63 |
62 enum FSMState { | 64 enum FSMState { |
63 STATE_IDLE = 0, | 65 STATE_IDLE = 0, |
| 66 STATE_PREPARING, |
64 STATE_STARTING, | 67 STATE_STARTING, |
65 STATE_ESTIMATING_ENVIRONMENT, | 68 STATE_ESTIMATING_ENVIRONMENT, |
66 STATE_WAITING_FOR_SPEECH, | 69 STATE_WAITING_FOR_SPEECH, |
67 STATE_RECOGNIZING, | 70 STATE_RECOGNIZING, |
68 STATE_WAITING_FINAL_RESULT, | 71 STATE_WAITING_FINAL_RESULT, |
69 STATE_ENDED, | 72 STATE_ENDED, |
70 STATE_MAX_VALUE = STATE_ENDED | 73 STATE_MAX_VALUE = STATE_ENDED |
71 }; | 74 }; |
72 | 75 |
73 enum FSMEvent { | 76 enum FSMEvent { |
74 EVENT_ABORT = 0, | 77 EVENT_ABORT = 0, |
| 78 EVENT_PREPARE, |
75 EVENT_START, | 79 EVENT_START, |
76 EVENT_STOP_CAPTURE, | 80 EVENT_STOP_CAPTURE, |
77 EVENT_AUDIO_DATA, | 81 EVENT_AUDIO_DATA, |
78 EVENT_ENGINE_RESULT, | 82 EVENT_ENGINE_RESULT, |
79 EVENT_ENGINE_ERROR, | 83 EVENT_ENGINE_ERROR, |
80 EVENT_AUDIO_ERROR, | 84 EVENT_AUDIO_ERROR, |
81 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR | 85 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR |
82 }; | 86 }; |
83 | 87 |
84 struct FSMEventArgs { | 88 struct FSMEventArgs { |
(...skipping 12 matching lines...) Expand all Loading... |
97 // Entry point for pushing any new external event into the recognizer FSM. | 101 // Entry point for pushing any new external event into the recognizer FSM. |
98 void DispatchEvent(const FSMEventArgs& event_args); | 102 void DispatchEvent(const FSMEventArgs& event_args); |
99 | 103 |
100 // Defines the behavior of the recognizer FSM, selecting the appropriate | 104 // Defines the behavior of the recognizer FSM, selecting the appropriate |
101 // transition according to the current state and event. | 105 // transition according to the current state and event. |
102 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); | 106 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); |
103 | 107 |
104 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). | 108 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). |
105 void ProcessAudioPipeline(const AudioChunk& raw_audio); | 109 void ProcessAudioPipeline(const AudioChunk& raw_audio); |
106 | 110 |
| 111 // Callback from AudioSystem. |
| 112 void OnDeviceInfo(const media::AudioParameters& params); |
| 113 |
107 // The methods below handle transitions of the recognizer FSM. | 114 // The methods below handle transitions of the recognizer FSM. |
| 115 FSMState PrepareRecognition(const FSMEventArgs&); |
108 FSMState StartRecording(const FSMEventArgs& event_args); | 116 FSMState StartRecording(const FSMEventArgs& event_args); |
109 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); | 117 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); |
110 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); | 118 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); |
111 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); | 119 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); |
112 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); | 120 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); |
113 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); | 121 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); |
114 FSMState ProcessFinalResult(const FSMEventArgs& event_args); | 122 FSMState ProcessFinalResult(const FSMEventArgs& event_args); |
115 FSMState AbortSilently(const FSMEventArgs& event_args); | 123 FSMState AbortSilently(const FSMEventArgs& event_args); |
116 FSMState AbortWithError(const FSMEventArgs& event_args); | 124 FSMState AbortWithError(const FSMEventArgs& event_args); |
117 FSMState Abort(const SpeechRecognitionError& error); | 125 FSMState Abort(const SpeechRecognitionError& error); |
(...skipping 27 matching lines...) Expand all Loading... |
145 uint32_t hardware_delay_bytes) override; | 153 uint32_t hardware_delay_bytes) override; |
146 void Close() override; | 154 void Close() override; |
147 | 155 |
148 // SpeechRecognitionEngineDelegate methods. | 156 // SpeechRecognitionEngineDelegate methods. |
149 void OnSpeechRecognitionEngineResults( | 157 void OnSpeechRecognitionEngineResults( |
150 const SpeechRecognitionResults& results) override; | 158 const SpeechRecognitionResults& results) override; |
151 void OnSpeechRecognitionEngineEndOfUtterance() override; | 159 void OnSpeechRecognitionEngineEndOfUtterance() override; |
152 void OnSpeechRecognitionEngineError( | 160 void OnSpeechRecognitionEngineError( |
153 const SpeechRecognitionError& error) override; | 161 const SpeechRecognitionError& error) override; |
154 | 162 |
155 static media::AudioManager* audio_manager_for_tests_; | 163 media::AudioSystem* GetAudioSystem(); |
156 | 164 |
| 165 static media::AudioSystem* audio_system_for_tests_; |
| 166 media::AudioSystem* audio_system_; |
157 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_; | 167 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_; |
158 Endpointer endpointer_; | 168 Endpointer endpointer_; |
159 scoped_refptr<media::AudioInputController> audio_controller_; | 169 scoped_refptr<media::AudioInputController> audio_controller_; |
160 std::unique_ptr<media::AudioLog> audio_log_; | 170 std::unique_ptr<media::AudioLog> audio_log_; |
161 int num_samples_recorded_; | 171 int num_samples_recorded_; |
162 float audio_level_; | 172 float audio_level_; |
163 bool is_dispatching_event_; | 173 bool is_dispatching_event_; |
164 bool provisional_results_; | 174 bool provisional_results_; |
165 bool end_of_utterance_; | 175 bool end_of_utterance_; |
166 FSMState state_; | 176 FSMState state_; |
167 std::string device_id_; | 177 std::string device_id_; |
| 178 media::AudioParameters device_params_; |
168 | 179 |
169 class OnDataConverter; | 180 class OnDataConverter; |
170 | 181 |
171 // Converts data between native input format and a WebSpeech specific | 182 // Converts data between native input format and a WebSpeech specific |
172 // output format. | 183 // output format. |
173 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_; | 184 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_; |
174 | 185 |
| 186 base::WeakPtrFactory<SpeechRecognizerImpl> weak_ptr_factory_; |
175 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 187 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
176 }; | 188 }; |
177 | 189 |
178 } // namespace content | 190 } // namespace content |
179 | 191 |
180 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 192 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
OLD | NEW |