Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(443)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include <memory> 8 #include <memory>
9 9
10 #include "base/macros.h" 10 #include "base/macros.h"
11 #include "base/memory/weak_ptr.h"
11 #include "content/browser/speech/endpointer/endpointer.h" 12 #include "content/browser/speech/endpointer/endpointer.h"
12 #include "content/browser/speech/speech_recognition_engine.h" 13 #include "content/browser/speech/speech_recognition_engine.h"
13 #include "content/browser/speech/speech_recognizer.h" 14 #include "content/browser/speech/speech_recognizer.h"
14 #include "content/public/common/speech_recognition_error.h" 15 #include "content/public/common/speech_recognition_error.h"
15 #include "content/public/common/speech_recognition_result.h" 16 #include "content/public/common/speech_recognition_result.h"
16 #include "media/audio/audio_input_controller.h" 17 #include "media/audio/audio_input_controller.h"
17 #include "media/audio/audio_logging.h" 18 #include "media/audio/audio_logging.h"
18 #include "net/url_request/url_request_context_getter.h" 19 #include "net/url_request/url_request_context_getter.h"
19 20
20 namespace media { 21 namespace media {
21 class AudioBus; 22 class AudioBus;
22 class AudioManager; 23 class AudioSystem;
23 } 24 }
24 25
25 namespace content { 26 namespace content {
26 27
27 class SpeechRecognitionEventListener; 28 class SpeechRecognitionEventListener;
28 29
29 // Handles speech recognition for a session (identified by |session_id|), taking 30 // Handles speech recognition for a session (identified by |session_id|), taking
30 // care of audio capture, silence detection/endpointer and interaction with the 31 // care of audio capture, silence detection/endpointer and interaction with the
31 // SpeechRecognitionEngine. 32 // SpeechRecognitionEngine.
32 class CONTENT_EXPORT SpeechRecognizerImpl 33 class CONTENT_EXPORT SpeechRecognizerImpl
33 : public SpeechRecognizer, 34 : public SpeechRecognizer,
34 public media::AudioInputController::EventHandler, 35 public media::AudioInputController::EventHandler,
35 public media::AudioInputController::SyncWriter, 36 public media::AudioInputController::SyncWriter,
36 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) { 37 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) {
37 public: 38 public:
38 static const int kAudioSampleRate; 39 static const int kAudioSampleRate;
39 static const media::ChannelLayout kChannelLayout; 40 static const media::ChannelLayout kChannelLayout;
40 static const int kNumBitsPerAudioSample; 41 static const int kNumBitsPerAudioSample;
41 static const int kNoSpeechTimeoutMs; 42 static const int kNoSpeechTimeoutMs;
42 static const int kEndpointerEstimationTimeMs; 43 static const int kEndpointerEstimationTimeMs;
43 44
44 static void SetAudioManagerForTesting(media::AudioManager* audio_manager); 45 static void SetAudioSystemForTesting(media::AudioSystem* audio_system);
45 46
46 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener, 47 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
48 media::AudioSystem* audio_system,
47 int session_id, 49 int session_id,
48 bool continuous, 50 bool continuous,
49 bool provisional_results, 51 bool provisional_results,
50 SpeechRecognitionEngine* engine); 52 SpeechRecognitionEngine* engine);
51 53
52 void StartRecognition(const std::string& device_id) override; 54 void StartRecognition(const std::string& device_id) override;
53 void AbortRecognition() override; 55 void AbortRecognition() override;
54 void StopAudioCapture() override; 56 void StopAudioCapture() override;
55 bool IsActive() const override; 57 bool IsActive() const override;
56 bool IsCapturingAudio() const override; 58 bool IsCapturingAudio() const override;
57 const SpeechRecognitionEngine& recognition_engine() const; 59 const SpeechRecognitionEngine& recognition_engine() const;
58 60
59 private: 61 private:
60 friend class SpeechRecognizerTest; 62 friend class SpeechRecognizerTest;
61 63
62 enum FSMState { 64 enum FSMState {
63 STATE_IDLE = 0, 65 STATE_IDLE = 0,
66 STATE_PREPARING,
64 STATE_STARTING, 67 STATE_STARTING,
65 STATE_ESTIMATING_ENVIRONMENT, 68 STATE_ESTIMATING_ENVIRONMENT,
66 STATE_WAITING_FOR_SPEECH, 69 STATE_WAITING_FOR_SPEECH,
67 STATE_RECOGNIZING, 70 STATE_RECOGNIZING,
68 STATE_WAITING_FINAL_RESULT, 71 STATE_WAITING_FINAL_RESULT,
69 STATE_ENDED, 72 STATE_ENDED,
70 STATE_MAX_VALUE = STATE_ENDED 73 STATE_MAX_VALUE = STATE_ENDED
71 }; 74 };
72 75
73 enum FSMEvent { 76 enum FSMEvent {
74 EVENT_ABORT = 0, 77 EVENT_ABORT = 0,
78 EVENT_PREPARE,
75 EVENT_START, 79 EVENT_START,
76 EVENT_STOP_CAPTURE, 80 EVENT_STOP_CAPTURE,
77 EVENT_AUDIO_DATA, 81 EVENT_AUDIO_DATA,
78 EVENT_ENGINE_RESULT, 82 EVENT_ENGINE_RESULT,
79 EVENT_ENGINE_ERROR, 83 EVENT_ENGINE_ERROR,
80 EVENT_AUDIO_ERROR, 84 EVENT_AUDIO_ERROR,
81 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR 85 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
82 }; 86 };
83 87
84 struct FSMEventArgs { 88 struct FSMEventArgs {
(...skipping 12 matching lines...) Expand all
97 // Entry point for pushing any new external event into the recognizer FSM. 101 // Entry point for pushing any new external event into the recognizer FSM.
98 void DispatchEvent(const FSMEventArgs& event_args); 102 void DispatchEvent(const FSMEventArgs& event_args);
99 103
100 // Defines the behavior of the recognizer FSM, selecting the appropriate 104 // Defines the behavior of the recognizer FSM, selecting the appropriate
101 // transition according to the current state and event. 105 // transition according to the current state and event.
102 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); 106 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
103 107
104 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). 108 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
105 void ProcessAudioPipeline(const AudioChunk& raw_audio); 109 void ProcessAudioPipeline(const AudioChunk& raw_audio);
106 110
111 // Callback from AudioSystem.
112 void OnDeviceInfo(const media::AudioParameters& params);
113
107 // The methods below handle transitions of the recognizer FSM. 114 // The methods below handle transitions of the recognizer FSM.
115 FSMState PrepareRecognition(const FSMEventArgs&);
108 FSMState StartRecording(const FSMEventArgs& event_args); 116 FSMState StartRecording(const FSMEventArgs& event_args);
109 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); 117 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
110 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); 118 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
111 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); 119 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
112 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); 120 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
113 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); 121 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
114 FSMState ProcessFinalResult(const FSMEventArgs& event_args); 122 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
115 FSMState AbortSilently(const FSMEventArgs& event_args); 123 FSMState AbortSilently(const FSMEventArgs& event_args);
116 FSMState AbortWithError(const FSMEventArgs& event_args); 124 FSMState AbortWithError(const FSMEventArgs& event_args);
117 FSMState Abort(const SpeechRecognitionError& error); 125 FSMState Abort(const SpeechRecognitionError& error);
(...skipping 27 matching lines...) Expand all
145 uint32_t hardware_delay_bytes) override; 153 uint32_t hardware_delay_bytes) override;
146 void Close() override; 154 void Close() override;
147 155
148 // SpeechRecognitionEngineDelegate methods. 156 // SpeechRecognitionEngineDelegate methods.
149 void OnSpeechRecognitionEngineResults( 157 void OnSpeechRecognitionEngineResults(
150 const SpeechRecognitionResults& results) override; 158 const SpeechRecognitionResults& results) override;
151 void OnSpeechRecognitionEngineEndOfUtterance() override; 159 void OnSpeechRecognitionEngineEndOfUtterance() override;
152 void OnSpeechRecognitionEngineError( 160 void OnSpeechRecognitionEngineError(
153 const SpeechRecognitionError& error) override; 161 const SpeechRecognitionError& error) override;
154 162
155 static media::AudioManager* audio_manager_for_tests_; 163 media::AudioSystem* GetAudioSystem();
156 164
165 static media::AudioSystem* audio_system_for_tests_;
166 media::AudioSystem* audio_system_;
157 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_; 167 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_;
158 Endpointer endpointer_; 168 Endpointer endpointer_;
159 scoped_refptr<media::AudioInputController> audio_controller_; 169 scoped_refptr<media::AudioInputController> audio_controller_;
160 std::unique_ptr<media::AudioLog> audio_log_; 170 std::unique_ptr<media::AudioLog> audio_log_;
161 int num_samples_recorded_; 171 int num_samples_recorded_;
162 float audio_level_; 172 float audio_level_;
163 bool is_dispatching_event_; 173 bool is_dispatching_event_;
164 bool provisional_results_; 174 bool provisional_results_;
165 bool end_of_utterance_; 175 bool end_of_utterance_;
166 FSMState state_; 176 FSMState state_;
167 std::string device_id_; 177 std::string device_id_;
178 media::AudioParameters device_params_;
168 179
169 class OnDataConverter; 180 class OnDataConverter;
170 181
171 // Converts data between native input format and a WebSpeech specific 182 // Converts data between native input format and a WebSpeech specific
172 // output format. 183 // output format.
173 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_; 184 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
174 185
186 base::WeakPtrFactory<SpeechRecognizerImpl> weak_ptr_factory_;
175 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 187 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
176 }; 188 };
177 189
178 } // namespace content 190 } // namespace content
179 191
180 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 192 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698