Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Added CONTENT_EXPORT on GoogleOneShotRemoteEngineConfig to address compilation issues on win. Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include "base/basictypes.h" 8 #include "base/basictypes.h"
9 #include "base/memory/ref_counted.h" 9 #include "base/memory/ref_counted.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
11 #include "content/browser/speech/endpointer/endpointer.h" 11 #include "content/browser/speech/endpointer/endpointer.h"
12 #include "content/browser/speech/speech_recognition_engine.h" 12 #include "content/browser/speech/speech_recognition_engine.h"
13 #include "content/public/browser/speech_recognizer.h" 13 #include "content/public/browser/speech_recognizer.h"
14 #include "content/public/common/speech_recognition_error.h" 14 #include "content/public/common/speech_recognition_error.h"
15 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h" 16 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h" 17 #include "net/url_request/url_request_context_getter.h"
17 18
18 namespace content { 19 namespace content {
19 class SpeechRecognitionEventListener; 20 class SpeechRecognitionEventListener;
20 struct SpeechRecognitionResult; 21 struct SpeechRecognitionResult;
21 } 22 }
22 23
23 namespace media { 24 namespace media {
24 class AudioInputController; 25 class AudioInputController;
25 class AudioManager; 26 class AudioManager;
26 } 27 }
27 28
28 namespace speech { 29 namespace speech {
29 30
30 // Records audio, sends recorded audio to server and translates server response 31 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
31 // to recognition result. 32 // /content/public/browser/speech_recognizer.h interface since this class should
33 // not be visible outside (currently we need it for speech input extension API).
34
35 // Handles speech recognition for a session (identified by |caller_id|), taking
36 // care of audio capture, silence detection/endpointer and interaction with the
37 // SpeechRecognitionEngine.
32 class CONTENT_EXPORT SpeechRecognizerImpl 38 class CONTENT_EXPORT SpeechRecognizerImpl
33 : public NON_EXPORTED_BASE(content::SpeechRecognizer), 39 : public NON_EXPORTED_BASE(content::SpeechRecognizer),
34 public media::AudioInputController::EventHandler, 40 public media::AudioInputController::EventHandler,
35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { 41 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
36 public: 42 public:
37 static const int kAudioSampleRate; 43 static const int kAudioSampleRate;
38 static const ChannelLayout kChannelLayout; 44 static const ChannelLayout kChannelLayout;
39 static const int kNumBitsPerAudioSample; 45 static const int kNumBitsPerAudioSample;
40 static const int kNoSpeechTimeoutMs; 46 static const int kNoSpeechTimeoutMs;
41 static const int kEndpointerEstimationTimeMs; 47 static const int kEndpointerEstimationTimeMs;
42 48
43 SpeechRecognizerImpl( 49 SpeechRecognizerImpl(
44 content::SpeechRecognitionEventListener* listener, 50 content::SpeechRecognitionEventListener* listener,
45 int caller_id, 51 int caller_id,
46 const std::string& language, 52 SpeechRecognitionEngine* engine);
47 const std::string& grammar,
48 net::URLRequestContextGetter* context_getter,
49 bool filter_profanities,
50 const std::string& hardware_info,
51 const std::string& origin_url);
52 virtual ~SpeechRecognizerImpl(); 53 virtual ~SpeechRecognizerImpl();
53 54
54 // content::SpeechRecognizer methods. 55 // content::SpeechRecognizer methods.
55 virtual void StartRecognition() OVERRIDE; 56 virtual void StartRecognition() OVERRIDE;
56 virtual void AbortRecognition() OVERRIDE; 57 virtual void AbortRecognition() OVERRIDE;
57 virtual void StopAudioCapture() OVERRIDE; 58 virtual void StopAudioCapture() OVERRIDE;
58 virtual bool IsActive() const OVERRIDE; 59 virtual bool IsActive() const OVERRIDE;
59 virtual bool IsCapturingAudio() const OVERRIDE; 60 virtual bool IsCapturingAudio() const OVERRIDE;
60 const SpeechRecognitionEngine& recognition_engine() const; 61 const SpeechRecognitionEngine& recognition_engine() const;
61 62
63 private:
64 friend class SpeechRecognizerImplTest;
65
66 enum FSMState {
67 STATE_IDLE = 0,
68 STATE_STARTING,
69 STATE_ESTIMATING_ENVIRONMENT,
70 STATE_WAITING_FOR_SPEECH,
71 STATE_RECOGNIZING,
72 STATE_WAITING_FINAL_RESULT,
73 STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
74 };
75
76 enum FSMEvent {
77 EVENT_ABORT = 0,
78 EVENT_START,
79 EVENT_STOP_CAPTURE,
80 EVENT_AUDIO_DATA,
81 EVENT_ENGINE_RESULT,
82 EVENT_ENGINE_ERROR,
83 EVENT_AUDIO_ERROR,
84 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
85 };
86
87 struct FSMEventArgs {
88 explicit FSMEventArgs(FSMEvent event_value);
89 ~FSMEventArgs();
90
91 FSMEvent event;
92 int audio_error_code;
93 scoped_refptr<AudioChunk> audio_data;
94 content::SpeechRecognitionResult engine_result;
95 content::SpeechRecognitionError engine_error;
96 };
97
98 // Entry point for pushing any new external event into the recognizer FSM.
99 void DispatchEvent(const FSMEventArgs& event_args);
100
101 // Defines the behavior of the recognizer FSM, selecting the appropriate
102 // transition according to the current state and event.
103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
104
105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
106 void ProcessAudioPipeline(const AudioChunk& raw_audio);
107
108 // The methods below handle transitions of the recognizer FSM.
109 FSMState StartRecording(const FSMEventArgs& event_args);
110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
115 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
116 FSMState Abort(const FSMEventArgs& event_args);
117 FSMState AbortWithError(const content::SpeechRecognitionError* error);
118 FSMState AbortWithError(const content::SpeechRecognitionError& error);
119 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
120 FSMState DoNothing(const FSMEventArgs& event_args) const;
121 FSMState NotFeasible(const FSMEventArgs& event_args);
122
123 // Returns the time span of captured audio samples since the start of capture.
124 int GetElapsedTimeMs() const;
125
126 // Calculates the input volume to be displayed in the UI, triggering the
127 // OnAudioLevelsChange event accordingly.
128 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
129
130 void CloseAudioControllerAsynchronously();
131 void SetAudioManagerForTesting(media::AudioManager* audio_manager);
132
133 // Callback called on IO thread by audio_controller->Close().
134 void OnAudioClosed(media::AudioInputController*);
135
62 // AudioInputController::EventHandler methods. 136 // AudioInputController::EventHandler methods.
63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} 137 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} 138 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
65 virtual void OnError(media::AudioInputController* controller, 139 virtual void OnError(media::AudioInputController* controller,
66 int error_code) OVERRIDE; 140 int error_code) OVERRIDE;
67 virtual void OnData(media::AudioInputController* controller, 141 virtual void OnData(media::AudioInputController* controller,
68 const uint8* data, 142 const uint8* data, uint32 size) OVERRIDE;
69 uint32 size) OVERRIDE;
70 143
71 // SpeechRecognitionEngineDelegate methods. 144 // SpeechRecognitionEngineDelegate methods.
72 virtual void OnSpeechRecognitionEngineResult( 145 virtual void OnSpeechRecognitionEngineResult(
73 const content::SpeechRecognitionResult& result) OVERRIDE; 146 const content::SpeechRecognitionResult& result) OVERRIDE;
74 virtual void OnSpeechRecognitionEngineError( 147 virtual void OnSpeechRecognitionEngineError(
75 const content::SpeechRecognitionError& error) OVERRIDE; 148 const content::SpeechRecognitionError& error) OVERRIDE;
76 149
77 private:
78 friend class SpeechRecognizerImplTest;
79
80 void InformErrorAndAbortRecognition(
81 content::SpeechRecognitionErrorCode error);
82 void SendRecordedAudioToServer();
83
84 void HandleOnError(int error_code); // Handles OnError in the IO thread.
85
86 // Handles OnData in the IO thread.
87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);
88
89 void OnAudioClosed(media::AudioInputController*);
90
91 // Helper method which closes the audio controller and frees it asynchronously
92 // without blocking the IO thread.
93 void CloseAudioControllerAsynchronously();
94
95 void SetAudioManagerForTesting(media::AudioManager* audio_manager);
96
97 content::SpeechRecognitionEventListener* listener_; 150 content::SpeechRecognitionEventListener* listener_;
98 media::AudioManager* testing_audio_manager_; 151 media::AudioManager* testing_audio_manager_;
99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; 152 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
100 Endpointer endpointer_; 153 Endpointer endpointer_;
101 scoped_refptr<media::AudioInputController> audio_controller_; 154 scoped_refptr<media::AudioInputController> audio_controller_;
102 scoped_refptr<net::URLRequestContextGetter> context_getter_;
103 int caller_id_; 155 int caller_id_;
104 std::string language_;
105 std::string grammar_;
106 bool filter_profanities_;
107 std::string hardware_info_;
108 std::string origin_url_;
109 int num_samples_recorded_; 156 int num_samples_recorded_;
110 float audio_level_; 157 float audio_level_;
158 bool is_dispatching_event_;
159 FSMState state_;
111 160
112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 161 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
113 }; 162 };
114 163
115 } // namespace speech 164 } // namespace speech
116 165
117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 166 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW
« no previous file with comments | « content/browser/speech/google_one_shot_remote_engine.h ('k') | content/browser/speech/speech_recognizer_impl.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698