Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| 7 | 7 |
| 8 #include <list> | 8 #include "base/basictypes.h" |
| 9 #include <utility> | 9 #include "base/memory/ref_counted.h" |
| 10 | |
| 11 #include "base/compiler_specific.h" | |
| 12 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 13 #include "content/browser/speech/audio_encoder.h" | |
| 14 #include "content/browser/speech/endpointer/endpointer.h" | 11 #include "content/browser/speech/endpointer/endpointer.h" |
| 15 #include "content/browser/speech/speech_recognition_request.h" | 12 #include "content/browser/speech/speech_recognition_engine.h" |
| 16 #include "content/public/browser/speech_recognizer.h" | 13 #include "content/public/browser/speech_recognizer.h" |
| 17 #include "content/public/common/speech_recognition_result.h" | 14 #include "content/public/common/speech_recognition_result.h" |
| 18 #include "media/audio/audio_input_controller.h" | 15 #include "media/audio/audio_input_controller.h" |
| 16 #include "net/url_request/url_request_context_getter.h" | |
| 19 | 17 |
| 20 class AudioManager; | 18 namespace media { |
| 19 class AudioInputController; | |
| 20 } | |
| 21 | 21 |
| 22 namespace content { | 22 namespace content { |
| 23 struct SpeechRecognitionError; | |
| 23 class SpeechRecognitionEventListener; | 24 class SpeechRecognitionEventListener; |
| 24 } | 25 } |
| 25 | 26 |
| 26 namespace speech { | 27 namespace speech { |
| 28 // TODO(primiano) Current CL: check CONTENT_EXPORT and NON_EXPORTED_BASE | |
|
hans
2012/03/16 11:12:56
should this be addressed before this cl is landed?
Primiano Tucci (use gerrit)
2012/03/16 15:03:42
Done.
| |
| 29 // TODO(primiano) Next CL: Remove the Impl suffix and the exported | |
| 30 // /content/public/browser/speech_recognizer.h interface since this class should | |
| 31 // not be visible outside (currently we need it for speech input extension API). | |
| 27 | 32 |
| 28 // Records audio, sends recorded audio to server and translates server response | 33 // Handles speech recognition for a session (identified by |caller_id|), taking |
| 29 // to recognition result. | 34 // care of audio capture, silence detection/endpointer and interaction with the |
| 35 // SpeechRecognitionEngine (that must be injected through set_delegate(...)). | |
|
hans
2012/03/16 11:12:56
i'm confused by the last bit here.. "SpeechRecogni
Primiano Tucci (use gerrit)
2012/03/16 15:03:42
Uh, definitely right. Probably it was just a wrong
| |
| 30 class CONTENT_EXPORT SpeechRecognizerImpl | 36 class CONTENT_EXPORT SpeechRecognizerImpl |
| 31 : NON_EXPORTED_BASE(public content::SpeechRecognizer), | 37 : NON_EXPORTED_BASE(public content::SpeechRecognizer), |
| 32 public media::AudioInputController::EventHandler, | 38 public media::AudioInputController::EventHandler, |
| 33 public SpeechRecognitionRequestDelegate { | 39 public SpeechRecognitionEngineDelegate { |
| 34 public: | 40 public: |
| 35 static const int kAudioSampleRate; | 41 static const int kAudioSampleRate; |
| 36 static const int kAudioPacketIntervalMs; // Duration of each audio packet. | |
| 37 static const ChannelLayout kChannelLayout; | 42 static const ChannelLayout kChannelLayout; |
| 38 static const int kNumBitsPerAudioSample; | 43 static const int kNumBitsPerAudioSample; |
| 39 static const int kNoSpeechTimeoutSec; | 44 static const int kNoSpeechTimeoutMs; |
| 40 static const int kEndpointerEstimationTimeMs; | 45 static const int kEndpointerEstimationTimeMs; |
| 41 | 46 |
| 42 SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener, | 47 SpeechRecognizerImpl( |
| 43 int caller_id, | 48 content::SpeechRecognitionEventListener* listener, |
| 44 const std::string& language, | 49 int caller_id, |
| 45 const std::string& grammar, | 50 SpeechRecognitionEngine* engine); |
| 46 net::URLRequestContextGetter* context_getter, | |
| 47 bool filter_profanities, | |
| 48 const std::string& hardware_info, | |
| 49 const std::string& origin_url); | |
| 50 | |
| 51 virtual ~SpeechRecognizerImpl(); | 51 virtual ~SpeechRecognizerImpl(); |
| 52 | 52 |
| 53 // content::SpeechRecognizer methods. | 53 // content::SpeechRecognizer methods. |
| 54 virtual bool StartRecognition() OVERRIDE; | 54 virtual void StartRecognition() OVERRIDE; |
| 55 virtual void AbortRecognition() OVERRIDE; | 55 virtual void AbortRecognition() OVERRIDE; |
| 56 virtual void StopAudioCapture() OVERRIDE; | 56 virtual void StopAudioCapture() OVERRIDE; |
| 57 virtual bool IsActive() const OVERRIDE; | 57 virtual bool IsActive() const OVERRIDE; |
| 58 virtual bool IsCapturingAudio() const OVERRIDE; | 58 virtual bool IsCapturingAudio() const OVERRIDE; |
| 59 const SpeechRecognitionEngine& recognition_engine() const; | |
| 60 | |
| 61 private: | |
| 62 friend class SpeechRecognizerImplTest; | |
| 63 | |
| 64 enum FSMState { | |
| 65 kIdle = 0, | |
| 66 kStartingRecognition, | |
| 67 kEstimatingEnvironment, | |
| 68 kWaitingForSpeech, | |
| 69 kRecognizingSpeech, | |
| 70 kWaitingFinalResult, | |
| 71 kMaxState = kWaitingFinalResult | |
| 72 }; | |
| 73 | |
| 74 enum FSMEvent { | |
| 75 kAbortRequest = 0, | |
| 76 kStartRequest, | |
| 77 kStopCaptureRequest, | |
| 78 kAudioData, | |
| 79 kRecognitionResult, | |
| 80 kRecognitionError, | |
| 81 kAudioError, | |
| 82 kMaxEvent = kAudioError | |
| 83 }; | |
| 84 | |
| 85 struct FSMEventArgs { | |
| 86 int audio_error_code; | |
| 87 AudioChunk* audio_data; | |
| 88 content::SpeechRecognitionResult speech_result; | |
| 89 content::SpeechRecognitionError error; | |
| 90 FSMEventArgs(); | |
| 91 }; | |
| 59 | 92 |
| 60 // AudioInputController::EventHandler methods. | 93 // AudioInputController::EventHandler methods. |
| 61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} | 94 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
| 62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} | 95 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} |
| 63 virtual void OnError(media::AudioInputController* controller, | 96 virtual void OnError(media::AudioInputController* controller, |
| 64 int error_code) OVERRIDE; | 97 int error_code) OVERRIDE; |
| 65 virtual void OnData(media::AudioInputController* controller, | 98 virtual void OnData(media::AudioInputController* controller, |
| 66 const uint8* data, | 99 const uint8* data, uint32 size) OVERRIDE; |
| 67 uint32 size) OVERRIDE; | |
| 68 | 100 |
| 69 // SpeechRecognitionRequest::Delegate methods. | 101 // SpeechRecognitionEngineDelegate methods. |
| 70 virtual void SetRecognitionResult( | 102 virtual void OnSpeechEngineResult( |
| 71 const content::SpeechRecognitionResult& result) OVERRIDE; | 103 const content::SpeechRecognitionResult& result) OVERRIDE; |
| 104 virtual void OnSpeechEngineError( | |
| 105 const content::SpeechRecognitionError& error) OVERRIDE; | |
| 72 | 106 |
| 73 private: | 107 void DispatchEvent(FSMEvent event, FSMEventArgs); |
| 74 friend class SpeechRecognizerImplTest; | 108 void ProcessAudioPipeline(); |
| 75 | 109 FSMState ProcessEvent(FSMEvent event); |
| 76 void InformErrorAndAbortRecognition( | 110 FSMState InitializeAndStartRecording(); |
| 77 content::SpeechRecognitionErrorCode error); | 111 FSMState StartSpeechRecognition(); |
| 78 void SendRecordedAudioToServer(); | 112 FSMState EnvironmentEstimation(); |
| 79 | 113 FSMState DetectUserSpeechOrTimeout(); |
| 80 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 114 FSMState StopCaptureAndWaitForResult(); |
| 81 | 115 FSMState ProcessIntermediateRecognitionResult(); |
| 82 // Handles OnData in the IO thread. Takes ownership of |raw_audio|. | 116 FSMState ProcessFinalRecognitionResult(); |
| 83 void HandleOnData(AudioChunk* raw_audio); | 117 FSMState Abort(); |
| 84 | 118 FSMState Abort(const content::SpeechRecognitionError& error); |
| 85 // Helper method which closes the audio controller and blocks until done. | 119 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error); |
| 120 FSMState DetectEndOfSpeech(); | |
| 121 FSMState DoNothing() const; | |
| 122 int GetElapsedTimeMs() const; | |
| 123 void UpdateSignalAndNoiseLevels(const float& rms); | |
| 86 void CloseAudioControllerSynchronously(); | 124 void CloseAudioControllerSynchronously(); |
| 87 | |
| 88 void SetAudioManagerForTesting(AudioManager* audio_manager); | 125 void SetAudioManagerForTesting(AudioManager* audio_manager); |
| 89 | 126 |
| 90 content::SpeechRecognitionEventListener* listener_; | 127 content::SpeechRecognitionEventListener* listener_; |
| 128 AudioManager* testing_audio_manager_; | |
| 129 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; | |
| 130 Endpointer endpointer_; | |
| 131 scoped_refptr<media::AudioInputController> audio_controller_; | |
| 91 int caller_id_; | 132 int caller_id_; |
| 92 std::string language_; | |
| 93 std::string grammar_; | |
| 94 bool filter_profanities_; | |
| 95 std::string hardware_info_; | |
| 96 std::string origin_url_; | |
| 97 | |
| 98 scoped_ptr<SpeechRecognitionRequest> request_; | |
| 99 scoped_refptr<media::AudioInputController> audio_controller_; | |
| 100 scoped_refptr<net::URLRequestContextGetter> context_getter_; | |
| 101 AudioEncoder::Codec codec_; | |
| 102 scoped_ptr<AudioEncoder> encoder_; | |
| 103 Endpointer endpointer_; | |
| 104 int num_samples_recorded_; | 133 int num_samples_recorded_; |
| 134 bool clipper_detected_clip_; | |
| 105 float audio_level_; | 135 float audio_level_; |
| 106 AudioManager* audio_manager_; | 136 float rms_; |
| 137 int event_dispatch_nesting_level_; | |
| 138 FSMState state_; | |
| 139 FSMEvent event_; | |
| 140 FSMEventArgs* event_args_; | |
| 107 | 141 |
| 108 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); | 142 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
| 109 }; | 143 }; |
| 110 | 144 |
| 111 } // namespace speech | 145 } // namespace speech |
| 112 | 146 |
| 113 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ | 147 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| OLD | NEW |