Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)
Patch Set: review comments addressed Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include <memory> 8 #include <memory>
9 #include <string> 9 #include <string>
10 10
11 #include "base/macros.h" 11 #include "base/macros.h"
12 #include "base/memory/weak_ptr.h"
12 #include "content/browser/speech/endpointer/endpointer.h" 13 #include "content/browser/speech/endpointer/endpointer.h"
13 #include "content/browser/speech/speech_recognition_engine.h" 14 #include "content/browser/speech/speech_recognition_engine.h"
14 #include "content/browser/speech/speech_recognizer.h" 15 #include "content/browser/speech/speech_recognizer.h"
15 #include "content/public/common/speech_recognition_error.h" 16 #include "content/public/common/speech_recognition_error.h"
16 #include "content/public/common/speech_recognition_result.h" 17 #include "content/public/common/speech_recognition_result.h"
17 #include "media/audio/audio_input_controller.h" 18 #include "media/audio/audio_input_controller.h"
18 #include "media/audio/audio_logging.h" 19 #include "media/audio/audio_logging.h"
19 #include "net/url_request/url_request_context_getter.h" 20 #include "net/url_request/url_request_context_getter.h"
20 21
21 namespace media { 22 namespace media {
22 class AudioBus; 23 class AudioBus;
23 class AudioManager; 24 class AudioSystem;
24 } 25 }
25 26
26 namespace content { 27 namespace content {
27 28
28 class SpeechRecognitionEventListener; 29 class SpeechRecognitionEventListener;
29 30
30 // Handles speech recognition for a session (identified by |session_id|), taking 31 // Handles speech recognition for a session (identified by |session_id|), taking
31 // care of audio capture, silence detection/endpointer and interaction with the 32 // care of audio capture, silence detection/endpointer and interaction with the
32 // SpeechRecognitionEngine. 33 // SpeechRecognitionEngine.
33 class CONTENT_EXPORT SpeechRecognizerImpl 34 class CONTENT_EXPORT SpeechRecognizerImpl
34 : public SpeechRecognizer, 35 : public SpeechRecognizer,
35 public media::AudioInputController::EventHandler, 36 public media::AudioInputController::EventHandler,
36 public media::AudioInputController::SyncWriter, 37 public media::AudioInputController::SyncWriter,
37 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) { 38 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) {
38 public: 39 public:
39 static const int kAudioSampleRate; 40 static const int kAudioSampleRate;
40 static const media::ChannelLayout kChannelLayout; 41 static const media::ChannelLayout kChannelLayout;
41 static const int kNumBitsPerAudioSample; 42 static const int kNumBitsPerAudioSample;
42 static const int kNoSpeechTimeoutMs; 43 static const int kNoSpeechTimeoutMs;
43 static const int kEndpointerEstimationTimeMs; 44 static const int kEndpointerEstimationTimeMs;
44 45
45 static void SetAudioManagerForTesting(media::AudioManager* audio_manager); 46 static void SetAudioSystemForTesting(media::AudioSystem* audio_system);
46 47
47 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener, 48 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
49 media::AudioSystem* audio_system,
48 int session_id, 50 int session_id,
49 bool continuous, 51 bool continuous,
50 bool provisional_results, 52 bool provisional_results,
51 SpeechRecognitionEngine* engine); 53 SpeechRecognitionEngine* engine);
52 54
53 void StartRecognition(const std::string& device_id) override; 55 void StartRecognition(const std::string& device_id) override;
54 void AbortRecognition() override; 56 void AbortRecognition() override;
55 void StopAudioCapture() override; 57 void StopAudioCapture() override;
56 bool IsActive() const override; 58 bool IsActive() const override;
57 bool IsCapturingAudio() const override; 59 bool IsCapturingAudio() const override;
58 const SpeechRecognitionEngine& recognition_engine() const; 60 const SpeechRecognitionEngine& recognition_engine() const;
59 61
60 private: 62 private:
61 friend class SpeechRecognizerTest; 63 friend class SpeechRecognizerTest;
62 64
63 enum FSMState { 65 enum FSMState {
64 STATE_IDLE = 0, 66 STATE_IDLE = 0,
67 STATE_PREPARING,
65 STATE_STARTING, 68 STATE_STARTING,
66 STATE_ESTIMATING_ENVIRONMENT, 69 STATE_ESTIMATING_ENVIRONMENT,
67 STATE_WAITING_FOR_SPEECH, 70 STATE_WAITING_FOR_SPEECH,
68 STATE_RECOGNIZING, 71 STATE_RECOGNIZING,
69 STATE_WAITING_FINAL_RESULT, 72 STATE_WAITING_FINAL_RESULT,
70 STATE_ENDED, 73 STATE_ENDED,
71 STATE_MAX_VALUE = STATE_ENDED 74 STATE_MAX_VALUE = STATE_ENDED
72 }; 75 };
73 76
74 enum FSMEvent { 77 enum FSMEvent {
75 EVENT_ABORT = 0, 78 EVENT_ABORT = 0,
79 EVENT_PREPARE,
76 EVENT_START, 80 EVENT_START,
77 EVENT_STOP_CAPTURE, 81 EVENT_STOP_CAPTURE,
78 EVENT_AUDIO_DATA, 82 EVENT_AUDIO_DATA,
79 EVENT_ENGINE_RESULT, 83 EVENT_ENGINE_RESULT,
80 EVENT_ENGINE_ERROR, 84 EVENT_ENGINE_ERROR,
81 EVENT_AUDIO_ERROR, 85 EVENT_AUDIO_ERROR,
82 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR 86 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
83 }; 87 };
84 88
85 struct FSMEventArgs { 89 struct FSMEventArgs {
(...skipping 12 matching lines...) Expand all
98 // Entry point for pushing any new external event into the recognizer FSM. 102 // Entry point for pushing any new external event into the recognizer FSM.
99 void DispatchEvent(const FSMEventArgs& event_args); 103 void DispatchEvent(const FSMEventArgs& event_args);
100 104
101 // Defines the behavior of the recognizer FSM, selecting the appropriate 105 // Defines the behavior of the recognizer FSM, selecting the appropriate
102 // transition according to the current state and event. 106 // transition according to the current state and event.
103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); 107 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
104 108
105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc). 109 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
106 void ProcessAudioPipeline(const AudioChunk& raw_audio); 110 void ProcessAudioPipeline(const AudioChunk& raw_audio);
107 111
112 // Callback from AudioSystem.
113 void OnDeviceInfo(const media::AudioParameters& params);
114
108 // The methods below handle transitions of the recognizer FSM. 115 // The methods below handle transitions of the recognizer FSM.
116 FSMState PrepareRecognition(const FSMEventArgs&);
109 FSMState StartRecording(const FSMEventArgs& event_args); 117 FSMState StartRecording(const FSMEventArgs& event_args);
110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); 118 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); 119 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); 120 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); 121 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); 122 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
115 FSMState ProcessFinalResult(const FSMEventArgs& event_args); 123 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
116 FSMState AbortSilently(const FSMEventArgs& event_args); 124 FSMState AbortSilently(const FSMEventArgs& event_args);
117 FSMState AbortWithError(const FSMEventArgs& event_args); 125 FSMState AbortWithError(const FSMEventArgs& event_args);
118 FSMState Abort(const SpeechRecognitionError& error); 126 FSMState Abort(const SpeechRecognitionError& error);
(...skipping 27 matching lines...) Expand all
146 uint32_t hardware_delay_bytes) override; 154 uint32_t hardware_delay_bytes) override;
147 void Close() override; 155 void Close() override;
148 156
149 // SpeechRecognitionEngineDelegate methods. 157 // SpeechRecognitionEngineDelegate methods.
150 void OnSpeechRecognitionEngineResults( 158 void OnSpeechRecognitionEngineResults(
151 const SpeechRecognitionResults& results) override; 159 const SpeechRecognitionResults& results) override;
152 void OnSpeechRecognitionEngineEndOfUtterance() override; 160 void OnSpeechRecognitionEngineEndOfUtterance() override;
153 void OnSpeechRecognitionEngineError( 161 void OnSpeechRecognitionEngineError(
154 const SpeechRecognitionError& error) override; 162 const SpeechRecognitionError& error) override;
155 163
156 static media::AudioManager* audio_manager_for_tests_; 164 media::AudioSystem* GetAudioSystem();
157 165
166 // Substitutes the real audio system in browser tests.
167 static media::AudioSystem* audio_system_for_tests_;
168 media::AudioSystem* audio_system_;
158 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_; 169 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_;
159 Endpointer endpointer_; 170 Endpointer endpointer_;
160 scoped_refptr<media::AudioInputController> audio_controller_; 171 scoped_refptr<media::AudioInputController> audio_controller_;
161 std::unique_ptr<media::AudioLog> audio_log_; 172 std::unique_ptr<media::AudioLog> audio_log_;
162 int num_samples_recorded_; 173 int num_samples_recorded_;
163 float audio_level_; 174 float audio_level_;
164 bool is_dispatching_event_; 175 bool is_dispatching_event_;
165 bool provisional_results_; 176 bool provisional_results_;
166 bool end_of_utterance_; 177 bool end_of_utterance_;
167 FSMState state_; 178 FSMState state_;
168 std::string device_id_; 179 std::string device_id_;
180 media::AudioParameters device_params_;
169 181
170 class OnDataConverter; 182 class OnDataConverter;
171 183
172 // Converts data between native input format and a WebSpeech specific 184 // Converts data between native input format and a WebSpeech specific
173 // output format. 185 // output format.
174 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_; 186 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
175 187
188 base::WeakPtrFactory<SpeechRecognizerImpl> weak_ptr_factory_;
176 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 189 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
177 }; 190 };
178 191
179 } // namespace content 192 } // namespace content
180 193
181 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 194 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698