OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
7 | 7 |
8 #include <list> | 8 #include "base/basictypes.h" |
9 #include <utility> | 9 #include "base/memory/ref_counted.h" |
10 | |
11 #include "base/compiler_specific.h" | |
12 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
13 #include "content/browser/speech/audio_encoder.h" | |
14 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
15 #include "content/browser/speech/speech_recognition_request.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
16 #include "content/public/browser/speech_recognizer.h" | 13 #include "content/public/browser/speech_recognizer.h" |
17 #include "content/public/common/speech_recognition_result.h" | 14 #include "content/public/common/speech_recognition_result.h" |
18 #include "media/audio/audio_input_controller.h" | 15 #include "media/audio/audio_input_controller.h" |
16 #include "net/url_request/url_request_context_getter.h" | |
19 | 17 |
20 class AudioManager; | 18 namespace media { |
19 class AudioInputController; | |
Satish
2012/03/21 13:29:48
2 spaces to 1 space
| |
20 } | |
21 | 21 |
22 namespace content { | 22 namespace content { |
23 struct SpeechRecognitionError; | |
23 class SpeechRecognitionEventListener; | 24 class SpeechRecognitionEventListener; |
24 } | 25 } |
25 | 26 |
26 namespace speech { | 27 namespace speech { |
28 // TODO(primiano) Next CL: Remove the Impl suffix and the exported | |
Satish
2012/03/21 13:29:48
add newline above
| |
29 // /content/public/browser/speech_recognizer.h interface since this class should | |
30 // not be visible outside (currently we need it for speech input extension API). | |
27 | 31 |
28 // Records audio, sends recorded audio to server and translates server response | 32 // Handles speech recognition for a session (identified by |caller_id|), taking |
29 // to recognition result. | 33 // care of audio capture, silence detection/endpointer and interaction with the |
34 // SpeechRecognitionEngine. | |
30 class CONTENT_EXPORT SpeechRecognizerImpl | 35 class CONTENT_EXPORT SpeechRecognizerImpl |
31 : NON_EXPORTED_BASE(public content::SpeechRecognizer), | 36 : public NON_EXPORTED_BASE(content::SpeechRecognizer), |
32 public media::AudioInputController::EventHandler, | 37 public media::AudioInputController::EventHandler, |
33 public SpeechRecognitionRequestDelegate { | 38 public SpeechRecognitionEngineDelegate { |
34 public: | 39 public: |
35 static const int kAudioSampleRate; | 40 static const int kAudioSampleRate; |
36 static const int kAudioPacketIntervalMs; // Duration of each audio packet. | |
37 static const ChannelLayout kChannelLayout; | 41 static const ChannelLayout kChannelLayout; |
38 static const int kNumBitsPerAudioSample; | 42 static const int kNumBitsPerAudioSample; |
39 static const int kNoSpeechTimeoutSec; | 43 static const int kNoSpeechTimeoutMs; |
40 static const int kEndpointerEstimationTimeMs; | 44 static const int kEndpointerEstimationTimeMs; |
41 | 45 |
42 SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener, | 46 SpeechRecognizerImpl( |
43 int caller_id, | 47 content::SpeechRecognitionEventListener* listener, |
44 const std::string& language, | 48 int caller_id, |
45 const std::string& grammar, | 49 SpeechRecognitionEngine* engine); |
46 net::URLRequestContextGetter* context_getter, | |
47 bool filter_profanities, | |
48 const std::string& hardware_info, | |
49 const std::string& origin_url); | |
50 | |
51 virtual ~SpeechRecognizerImpl(); | 50 virtual ~SpeechRecognizerImpl(); |
52 | 51 |
53 // content::SpeechRecognizer methods. | 52 // content::SpeechRecognizer methods. |
54 virtual bool StartRecognition() OVERRIDE; | 53 virtual void StartRecognition() OVERRIDE; |
55 virtual void AbortRecognition() OVERRIDE; | 54 virtual void AbortRecognition() OVERRIDE; |
56 virtual void StopAudioCapture() OVERRIDE; | 55 virtual void StopAudioCapture() OVERRIDE; |
57 virtual bool IsActive() const OVERRIDE; | 56 virtual bool IsActive() const OVERRIDE; |
58 virtual bool IsCapturingAudio() const OVERRIDE; | 57 virtual bool IsCapturingAudio() const OVERRIDE; |
58 const SpeechRecognitionEngine& recognition_engine() const; | |
59 | |
60 private: | |
61 friend class SpeechRecognizerImplTest; | |
62 | |
63 enum FSMState { | |
Satish
2012/03/21 13:29:48
could the FSM changes be moved to a separate CL as
Primiano Tucci (use gerrit)
2012/03/22 11:20:41
Done.
| |
64 kIdle = 0, | |
65 kStartingRecognition, | |
66 kEstimatingEnvironment, | |
67 kWaitingForSpeech, | |
68 kRecognizingSpeech, | |
69 kWaitingFinalResult, | |
70 kMaxState = kWaitingFinalResult | |
71 }; | |
72 | |
73 enum FSMEvent { | |
74 kAbortRequest = 0, | |
75 kStartRequest, | |
76 kStopCaptureRequest, | |
77 kAudioData, | |
78 kRecognitionResult, | |
79 kRecognitionError, | |
80 kAudioError, | |
81 kMaxEvent = kAudioError | |
82 }; | |
83 | |
84 struct FSMEventArgs { | |
85 int audio_error_code; | |
86 AudioChunk* audio_data; | |
87 content::SpeechRecognitionResult speech_result; | |
88 content::SpeechRecognitionError error; | |
89 FSMEventArgs(); | |
90 }; | |
59 | 91 |
60 // AudioInputController::EventHandler methods. | 92 // AudioInputController::EventHandler methods. |
61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 93 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 94 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
63 virtual void OnError(media::AudioInputController* controller, | 95 virtual void OnError(media::AudioInputController* controller, |
64 int error_code) OVERRIDE; | 96 int error_code) OVERRIDE; |
65 virtual void OnData(media::AudioInputController* controller, | 97 virtual void OnData(media::AudioInputController* controller, |
66 const uint8* data, | 98 const uint8* data, uint32 size) OVERRIDE; |
67 uint32 size) OVERRIDE; | |
68 | 99 |
69 // SpeechRecognitionRequest::Delegate methods. | 100 // SpeechRecognitionEngineDelegate methods. |
70 virtual void SetRecognitionResult( | 101 virtual void OnSpeechEngineResult( |
71 const content::SpeechRecognitionResult& result) OVERRIDE; | 102 const content::SpeechRecognitionResult& result) OVERRIDE; |
103 virtual void OnSpeechEngineError( | |
104 const content::SpeechRecognitionError& error) OVERRIDE; | |
72 | 105 |
73 private: | 106 void DispatchEvent(FSMEvent event, FSMEventArgs); |
74 friend class SpeechRecognizerImplTest; | 107 void ProcessAudioPipeline(); |
75 | 108 FSMState ProcessEvent(FSMEvent event); |
76 void InformErrorAndAbortRecognition( | 109 FSMState InitializeAndStartRecording(); |
77 content::SpeechRecognitionErrorCode error); | 110 FSMState StartSpeechRecognition(); |
78 void SendRecordedAudioToServer(); | 111 FSMState EnvironmentEstimation(); |
79 | 112 FSMState DetectUserSpeechOrTimeout(); |
80 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 113 FSMState StopCaptureAndWaitForResult(); |
81 | 114 FSMState ProcessIntermediateRecognitionResult(); |
82 // Handles OnData in the IO thread. Takes ownership of |raw_audio|. | 115 FSMState ProcessFinalRecognitionResult(); |
83 void HandleOnData(AudioChunk* raw_audio); | 116 FSMState Abort(); |
84 | 117 FSMState Abort(const content::SpeechRecognitionError& error); |
85 // Helper method which closes the audio controller and blocks until done. | 118 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error); |
119 FSMState DetectEndOfSpeech(); | |
120 FSMState DoNothing() const; | |
121 int GetElapsedTimeMs() const; | |
122 void UpdateSignalAndNoiseLevels(const float& rms); | |
86 void CloseAudioControllerSynchronously(); | 123 void CloseAudioControllerSynchronously(); |
87 | |
88 void SetAudioManagerForTesting(AudioManager* audio_manager); | 124 void SetAudioManagerForTesting(AudioManager* audio_manager); |
89 | 125 |
90 content::SpeechRecognitionEventListener* listener_; | 126 content::SpeechRecognitionEventListener* listener_; |
127 AudioManager* testing_audio_manager_; | |
128 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | |
129 Endpointer endpointer_; | |
130 scoped_refptr<media::AudioInputController> audio_controller_; | |
91 int caller_id_; | 131 int caller_id_; |
92 std::string language_; | |
93 std::string grammar_; | |
94 bool filter_profanities_; | |
95 std::string hardware_info_; | |
96 std::string origin_url_; | |
97 | |
98 scoped_ptr<SpeechRecognitionRequest> request_; | |
99 scoped_refptr<media::AudioInputController> audio_controller_; | |
100 scoped_refptr<net::URLRequestContextGetter> context_getter_; | |
101 AudioEncoder::Codec codec_; | |
102 scoped_ptr<AudioEncoder> encoder_; | |
103 Endpointer endpointer_; | |
104 int num_samples_recorded_; | 132 int num_samples_recorded_; |
133 bool clipper_detected_clip_; | |
105 float audio_level_; | 134 float audio_level_; |
106 AudioManager* audio_manager_; | 135 float rms_; |
136 int event_dispatch_nesting_level_; | |
137 FSMState state_; | |
138 FSMEvent event_; | |
139 FSMEventArgs* event_args_; | |
107 | 140 |
108 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 141 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
109 }; | 142 }; |
110 | 143 |
111 } // namespace speech | 144 } // namespace speech |
112 | 145 |
113 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 146 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
OLD | NEW |