OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
7 | 7 |
8 #include "base/basictypes.h" | 8 #include "base/basictypes.h" |
9 #include "base/memory/ref_counted.h" | 9 #include "base/memory/ref_counted.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
11 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
12 #include "content/browser/speech/speech_recognition_engine.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
13 #include "content/public/browser/speech_recognizer.h" | 13 #include "content/public/browser/speech_recognizer.h" |
14 #include "content/public/common/speech_recognition_error.h" | 14 #include "content/public/common/speech_recognition_error.h" |
15 #include "content/public/common/speech_recognition_result.h" | |
15 #include "media/audio/audio_input_controller.h" | 16 #include "media/audio/audio_input_controller.h" |
16 #include "net/url_request/url_request_context_getter.h" | 17 #include "net/url_request/url_request_context_getter.h" |
17 | 18 |
18 namespace content { | 19 namespace content { |
19 class SpeechRecognitionEventListener; | 20 class SpeechRecognitionEventListener; |
20 struct SpeechRecognitionResult; | 21 struct SpeechRecognitionResult; |
21 } | 22 } |
22 | 23 |
23 namespace media { | 24 namespace media { |
24 class AudioInputController; | 25 class AudioInputController; |
25 class AudioManager; | 26 class AudioManager; |
26 } | 27 } |
27 | 28 |
28 namespace speech { | 29 namespace speech { |
30 // TODO(primiano) Next CL: Remove the Impl suffix and the exported | |
Satish
2012/04/12 08:58:33
add newline above full length comments such as thi
Primiano Tucci (use gerrit)
2012/04/12 12:56:48
Done.
| |
31 // /content/public/browser/speech_recognizer.h interface since this class should | |
32 // not be visible outside (currently we need it for speech input extension API). | |
29 | 33 |
30 // Records audio, sends recorded audio to server and translates server response | 34 // Handles speech recognition for a session (identified by |caller_id|), taking |
31 // to recognition result. | 35 // care of audio capture, silence detection/endpointer and interaction with the |
36 // SpeechRecognitionEngine. | |
32 class CONTENT_EXPORT SpeechRecognizerImpl | 37 class CONTENT_EXPORT SpeechRecognizerImpl |
33 : public NON_EXPORTED_BASE(content::SpeechRecognizer), | 38 : public NON_EXPORTED_BASE(content::SpeechRecognizer), |
34 public media::AudioInputController::EventHandler, | 39 public media::AudioInputController::EventHandler, |
35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { | 40 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { |
36 public: | 41 public: |
37 static const int kAudioSampleRate; | 42 static const int kAudioSampleRate; |
38 static const ChannelLayout kChannelLayout; | 43 static const ChannelLayout kChannelLayout; |
39 static const int kNumBitsPerAudioSample; | 44 static const int kNumBitsPerAudioSample; |
40 static const int kNoSpeechTimeoutMs; | 45 static const int kNoSpeechTimeoutMs; |
41 static const int kEndpointerEstimationTimeMs; | 46 static const int kEndpointerEstimationTimeMs; |
42 | 47 |
43 SpeechRecognizerImpl( | 48 SpeechRecognizerImpl( |
44 content::SpeechRecognitionEventListener* listener, | 49 content::SpeechRecognitionEventListener* listener, |
45 int caller_id, | 50 int caller_id, |
46 const std::string& language, | 51 SpeechRecognitionEngine* engine); |
47 const std::string& grammar, | |
48 net::URLRequestContextGetter* context_getter, | |
49 bool filter_profanities, | |
50 const std::string& hardware_info, | |
51 const std::string& origin_url); | |
52 virtual ~SpeechRecognizerImpl(); | 52 virtual ~SpeechRecognizerImpl(); |
53 | 53 |
54 // content::SpeechRecognizer methods. | 54 // content::SpeechRecognizer methods. |
55 virtual void StartRecognition() OVERRIDE; | 55 virtual void StartRecognition() OVERRIDE; |
56 virtual void AbortRecognition() OVERRIDE; | 56 virtual void AbortRecognition() OVERRIDE; |
57 virtual void StopAudioCapture() OVERRIDE; | 57 virtual void StopAudioCapture() OVERRIDE; |
58 virtual bool IsActive() const OVERRIDE; | 58 virtual bool IsActive() const OVERRIDE; |
59 virtual bool IsCapturingAudio() const OVERRIDE; | 59 virtual bool IsCapturingAudio() const OVERRIDE; |
60 const SpeechRecognitionEngine& recognition_engine() const; | 60 const SpeechRecognitionEngine& recognition_engine() const; |
61 | 61 |
62 private: | |
63 friend class SpeechRecognizerImplTest; | |
64 | |
65 enum FSMState { | |
66 STATE_IDLE = 0, | |
67 STATE_STARTING, | |
68 STATE_ESTIMATING_ENVIRONMENT, | |
69 STATE_WAITING_FOR_SPEECH, | |
70 STATE_RECOGNIZING, | |
71 STATE_WAITING_FINAL_RESULT, | |
72 STATE_MAX = STATE_WAITING_FINAL_RESULT | |
73 }; | |
74 | |
75 enum FSMEvent { | |
76 EVENT_ABORT = 0, | |
77 EVENT_START, | |
78 EVENT_STOP_CAPTURE, | |
79 EVENT_AUDIO_DATA, | |
80 EVENT_ENGINE_RESULT, | |
81 EVENT_ENGINE_ERROR, | |
82 EVENT_AUDIO_ERROR, | |
83 EVENT_MAX = EVENT_AUDIO_ERROR | |
84 }; | |
85 | |
86 struct FSMEventArgs { | |
87 explicit FSMEventArgs(FSMEvent event_value); | |
88 ~FSMEventArgs(); | |
89 | |
90 FSMEvent event; | |
91 int audio_error_code; | |
92 scoped_refptr<AudioChunk> audio_data; | |
93 content::SpeechRecognitionResult engine_result; | |
94 content::SpeechRecognitionError engine_error; | |
95 }; | |
96 | |
97 void DispatchEvent(const FSMEventArgs& event_args); | |
98 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args); | |
99 void ProcessAudioPipeline(const AudioChunk& raw_audio); | |
100 FSMState StartRecording(const FSMEventArgs& event_args); | |
101 FSMState StartRecognitionEngine(const FSMEventArgs& event_args); | |
102 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args); | |
103 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args); | |
104 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args); | |
105 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args); | |
106 FSMState ProcessFinalResult(const FSMEventArgs& event_args); | |
107 FSMState Abort(const FSMEventArgs& event_args); | |
108 FSMState AbortWithError(const content::SpeechRecognitionError* error); | |
109 FSMState AbortWithError(const content::SpeechRecognitionError& error); | |
110 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args); | |
111 FSMState DoNothing(const FSMEventArgs& event_args) const; | |
112 FSMState NotFeasible(const FSMEventArgs& event_args); | |
113 int GetElapsedTimeMs() const; | |
Satish
2012/04/12 08:58:33
can we separate logically the above methods from b
Primiano Tucci (use gerrit)
2012/04/12 12:56:48
Done.
| |
114 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected); | |
115 void CloseAudioControllerAsynchronously(); | |
116 void SetAudioManagerForTesting(media::AudioManager* audio_manager); | |
117 | |
62 // AudioInputController::EventHandler methods. | 118 // AudioInputController::EventHandler methods. |
63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 119 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 120 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
65 virtual void OnError(media::AudioInputController* controller, | 121 virtual void OnError(media::AudioInputController* controller, |
66 int error_code) OVERRIDE; | 122 int error_code) OVERRIDE; |
67 virtual void OnData(media::AudioInputController* controller, | 123 virtual void OnData(media::AudioInputController* controller, |
68 const uint8* data, | 124 const uint8* data, uint32 size) OVERRIDE; |
69 uint32 size) OVERRIDE; | 125 |
126 // Callback called on IO thread by audio_controller->Close(). | |
127 void OnAudioClosed(media::AudioInputController*); | |
70 | 128 |
71 // SpeechRecognitionEngineDelegate methods. | 129 // SpeechRecognitionEngineDelegate methods. |
72 virtual void OnSpeechRecognitionEngineResult( | 130 virtual void OnSpeechRecognitionEngineResult( |
73 const content::SpeechRecognitionResult& result) OVERRIDE; | 131 const content::SpeechRecognitionResult& result) OVERRIDE; |
74 virtual void OnSpeechRecognitionEngineError( | 132 virtual void OnSpeechRecognitionEngineError( |
75 const content::SpeechRecognitionError& error) OVERRIDE; | 133 const content::SpeechRecognitionError& error) OVERRIDE; |
76 | 134 |
77 private: | |
78 friend class SpeechRecognizerImplTest; | |
79 | |
80 void InformErrorAndAbortRecognition( | |
81 content::SpeechRecognitionErrorCode error); | |
82 void SendRecordedAudioToServer(); | |
83 | |
84 void HandleOnError(int error_code); // Handles OnError in the IO thread. | |
85 | |
86 // Handles OnData in the IO thread. | |
87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio); | |
88 | |
89 void OnAudioClosed(media::AudioInputController*); | |
90 | |
91 // Helper method which closes the audio controller and frees it asynchronously | |
92 // without blocking the IO thread. | |
93 void CloseAudioControllerAsynchronously(); | |
94 | |
95 void SetAudioManagerForTesting(media::AudioManager* audio_manager); | |
96 | |
97 content::SpeechRecognitionEventListener* listener_; | 135 content::SpeechRecognitionEventListener* listener_; |
98 media::AudioManager* testing_audio_manager_; | 136 media::AudioManager* testing_audio_manager_; |
99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | 137 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; |
100 Endpointer endpointer_; | 138 Endpointer endpointer_; |
101 scoped_refptr<media::AudioInputController> audio_controller_; | 139 scoped_refptr<media::AudioInputController> audio_controller_; |
102 scoped_refptr<net::URLRequestContextGetter> context_getter_; | |
103 int caller_id_; | 140 int caller_id_; |
104 std::string language_; | |
105 std::string grammar_; | |
106 bool filter_profanities_; | |
107 std::string hardware_info_; | |
108 std::string origin_url_; | |
109 int num_samples_recorded_; | 141 int num_samples_recorded_; |
110 float audio_level_; | 142 float audio_level_; |
143 bool in_event_dispatching_; | |
Satish
2012/04/12 08:58:33
probably reword as 'is_dispatching_event_'
Primiano Tucci (use gerrit)
2012/04/12 12:56:48
Done.
| |
144 FSMState state_; | |
111 | 145 |
112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 146 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
113 }; | 147 }; |
114 | 148 |
115 } // namespace speech | 149 } // namespace speech |
116 | 150 |
117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 151 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
OLD | NEW |