OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
7 | 7 |
8 #include "base/basictypes.h" | 8 #include "base/basictypes.h" |
9 #include "base/memory/ref_counted.h" | |
9 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
10 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
11 #include "content/browser/speech/speech_recognition_engine.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
12 #include "content/public/browser/speech_recognizer.h" | 13 #include "content/public/browser/speech_recognizer.h" |
13 #include "content/public/common/speech_recognition_error.h" | 14 #include "content/public/common/speech_recognition_error.h" |
15 #include "content/public/common/speech_recognition_result.h" | |
14 #include "media/audio/audio_input_controller.h" | 16 #include "media/audio/audio_input_controller.h" |
15 #include "net/url_request/url_request_context_getter.h" | 17 #include "net/url_request/url_request_context_getter.h" |
16 | 18 |
17 namespace content { | 19 namespace content { |
18 class SpeechRecognitionEventListener; | 20 class SpeechRecognitionEventListener; |
19 struct SpeechRecognitionResult; | |
20 } | |
21 | |
22 namespace media { | |
23 class AudioInputController; | |
24 } | 21 } |
25 | 22 |
26 namespace speech { | 23 namespace speech { |
24 // TODO(primiano) Next CL: Remove the Impl suffix and the exported | |
25 // /content/public/browser/speech_recognizer.h interface since this class should | |
26 // not be visible outside (currently we need it for speech input extension API). | |
27 | 27 |
28 // Records audio, sends recorded audio to server and translates server response | 28 // Handles speech recognition for a session (identified by |caller_id|), taking |
29 // to recognition result. | 29 // care of audio capture, silence detection/endpointer and interaction with the |
30 // SpeechRecognitionEngine. | |
30 class CONTENT_EXPORT SpeechRecognizerImpl | 31 class CONTENT_EXPORT SpeechRecognizerImpl |
31 : public NON_EXPORTED_BASE(content::SpeechRecognizer), | 32 : public NON_EXPORTED_BASE(content::SpeechRecognizer), |
32 public media::AudioInputController::EventHandler, | 33 public media::AudioInputController::EventHandler, |
33 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { | 34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { |
34 public: | 35 public: |
35 static const int kAudioSampleRate; | 36 static const int kAudioSampleRate; |
36 static const ChannelLayout kChannelLayout; | 37 static const ChannelLayout kChannelLayout; |
37 static const int kNumBitsPerAudioSample; | 38 static const int kNumBitsPerAudioSample; |
38 static const int kNoSpeechTimeoutMs; | 39 static const int kNoSpeechTimeoutMs; |
39 static const int kEndpointerEstimationTimeMs; | 40 static const int kEndpointerEstimationTimeMs; |
40 | 41 |
41 SpeechRecognizerImpl( | 42 SpeechRecognizerImpl( |
42 content::SpeechRecognitionEventListener* listener, | 43 content::SpeechRecognitionEventListener* listener, |
43 int caller_id, | 44 int caller_id, |
44 const std::string& language, | 45 SpeechRecognitionEngine* engine); |
45 const std::string& grammar, | |
46 net::URLRequestContextGetter* context_getter, | |
47 bool filter_profanities, | |
48 const std::string& hardware_info, | |
49 const std::string& origin_url); | |
50 virtual ~SpeechRecognizerImpl(); | 46 virtual ~SpeechRecognizerImpl(); |
51 | 47 |
52 // content::SpeechRecognizer methods. | 48 // content::SpeechRecognizer methods. |
53 virtual void StartRecognition() OVERRIDE; | 49 virtual void StartRecognition() OVERRIDE; |
54 virtual void AbortRecognition() OVERRIDE; | 50 virtual void AbortRecognition() OVERRIDE; |
55 virtual void StopAudioCapture() OVERRIDE; | 51 virtual void StopAudioCapture() OVERRIDE; |
56 virtual bool IsActive() const OVERRIDE; | 52 virtual bool IsActive() const OVERRIDE; |
57 virtual bool IsCapturingAudio() const OVERRIDE; | 53 virtual bool IsCapturingAudio() const OVERRIDE; |
58 const SpeechRecognitionEngine& recognition_engine() const; | 54 const SpeechRecognitionEngine& recognition_engine() const; |
59 | 55 |
56 private: | |
57 friend class SpeechRecognizerImplTest; | |
58 | |
59 enum FSMState { | |
60 kIdle = 0, | |
Satish
2012/03/27 09:47:42
enum values should be MACRO_STYLE
http://dev.chrom
Primiano Tucci (use gerrit)
2012/03/28 13:24:44
Done.
| |
61 kStartingRecognition, | |
62 kEstimatingEnvironment, | |
63 kWaitingForSpeech, | |
64 kRecognizingSpeech, | |
65 kWaitingFinalResult, | |
66 kMaxState = kWaitingFinalResult | |
67 }; | |
68 | |
69 enum FSMEvent { | |
70 kAbortRequest = 0, | |
Satish
2012/03/27 09:47:42
seems like we can drop the 'Request' suffix in the
Primiano Tucci (use gerrit)
2012/03/28 13:24:44
I added the suffix because their name should repre
| |
71 kStartRequest, | |
72 kStopCaptureRequest, | |
73 kAudioData, | |
74 kRecognitionResult, | |
75 kRecognitionError, | |
76 kAudioError, | |
77 kMaxEvent = kAudioError | |
78 }; | |
79 | |
80 struct FSMEventArgs { | |
81 int audio_error_code; | |
82 AudioChunk* audio_data; | |
83 content::SpeechRecognitionResult speech_result; | |
84 content::SpeechRecognitionError error; | |
Satish
2012/03/27 09:47:42
change to speech_error
Primiano Tucci (use gerrit)
2012/03/28 13:24:44
Done.
| |
85 FSMEventArgs(); | |
86 }; | |
87 | |
60 // AudioInputController::EventHandler methods. | 88 // AudioInputController::EventHandler methods. |
61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 89 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 90 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
63 virtual void OnError(media::AudioInputController* controller, | 91 virtual void OnError(media::AudioInputController* controller, |
64 int error_code) OVERRIDE; | 92 int error_code) OVERRIDE; |
65 virtual void OnData(media::AudioInputController* controller, | 93 virtual void OnData(media::AudioInputController* controller, |
66 const uint8* data, | 94 const uint8* data, uint32 size) OVERRIDE; |
67 uint32 size) OVERRIDE; | |
68 | 95 |
69 // SpeechRecognitionEngineDelegate methods. | 96 // SpeechRecognitionEngineDelegate methods. |
70 virtual void OnSpeechRecognitionEngineResult( | 97 virtual void OnSpeechRecognitionEngineResult( |
71 const content::SpeechRecognitionResult& result) OVERRIDE; | 98 const content::SpeechRecognitionResult& result) OVERRIDE; |
72 virtual void OnSpeechRecognitionEngineError( | 99 virtual void OnSpeechRecognitionEngineError( |
73 const content::SpeechRecognitionError& error) OVERRIDE; | 100 const content::SpeechRecognitionError& error) OVERRIDE; |
74 | 101 |
75 private: | 102 void DispatchEvent(FSMEvent event, FSMEventArgs); |
76 friend class SpeechRecognizerImplTest; | 103 void ProcessAudioPipeline(); |
77 | 104 FSMState ProcessEvent(FSMEvent event); |
78 void InformErrorAndAbortRecognition( | 105 FSMState InitializeAndStartRecording(); |
Satish
2012/03/27 09:47:42
rename to StartRecording
Primiano Tucci (use gerrit)
2012/03/28 13:24:44
Done.
| |
79 content::SpeechRecognitionErrorCode error); | 106 FSMState StartSpeechRecognition(); |
Satish
2012/03/27 09:47:42
rename to StartRecognitionEngine
Primiano Tucci (use gerrit)
2012/03/28 13:24:44
Done.
| |
80 void SendRecordedAudioToServer(); | 107 FSMState EnvironmentEstimation(); |
81 | 108 FSMState DetectUserSpeechOrTimeout(); |
82 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 109 FSMState StopCaptureAndWaitForResult(); |
83 | 110 FSMState ProcessIntermediateRecognitionResult(); |
84 // Handles OnData in the IO thread. Takes ownership of |raw_audio|. | 111 FSMState ProcessFinalRecognitionResult(); |
85 void HandleOnData(AudioChunk* raw_audio); | 112 FSMState Abort(); |
86 | 113 FSMState Abort(const content::SpeechRecognitionError& error); |
87 // Helper method which closes the audio controller and blocks until done. | 114 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error); |
115 FSMState DetectEndOfSpeech(); | |
116 FSMState DoNothing() const; | |
117 int GetElapsedTimeMs() const; | |
118 void UpdateSignalAndNoiseLevels(const float& rms); | |
88 void CloseAudioControllerSynchronously(); | 119 void CloseAudioControllerSynchronously(); |
89 | |
90 void SetAudioManagerForTesting(AudioManager* audio_manager); | 120 void SetAudioManagerForTesting(AudioManager* audio_manager); |
91 | 121 |
92 content::SpeechRecognitionEventListener* listener_; | 122 content::SpeechRecognitionEventListener* listener_; |
93 AudioManager* testing_audio_manager_; | 123 AudioManager* testing_audio_manager_; |
94 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | 124 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; |
95 Endpointer endpointer_; | 125 Endpointer endpointer_; |
96 scoped_refptr<media::AudioInputController> audio_controller_; | 126 scoped_refptr<media::AudioInputController> audio_controller_; |
97 scoped_refptr<net::URLRequestContextGetter> context_getter_; | |
98 int caller_id_; | 127 int caller_id_; |
99 std::string language_; | |
100 std::string grammar_; | |
101 bool filter_profanities_; | |
102 std::string hardware_info_; | |
103 std::string origin_url_; | |
104 int num_samples_recorded_; | 128 int num_samples_recorded_; |
129 bool clipper_detected_clip_; | |
105 float audio_level_; | 130 float audio_level_; |
131 float rms_; | |
132 int event_dispatch_nesting_level_; | |
133 FSMState state_; | |
134 FSMEvent event_; | |
135 FSMEventArgs* event_args_; | |
106 | 136 |
107 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 137 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
108 }; | 138 }; |
109 | 139 |
110 } // namespace speech | 140 } // namespace speech |
111 | 141 |
112 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 142 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
OLD | NEW |