Chromium Code Reviews| Index: content/browser/speech/speech_recognizer_impl.h |
| diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h |
| index 1b93fcd01b337763c184e5e118806ab9b42c22cd..584650469c11cdffb7a5857ab1d23d21c603309d 100644 |
| --- a/content/browser/speech/speech_recognizer_impl.h |
| +++ b/content/browser/speech/speech_recognizer_impl.h |
| @@ -5,57 +5,90 @@ |
| #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ |
| -#include <list> |
| -#include <utility> |
| - |
| -#include "base/compiler_specific.h" |
| +#include "base/basictypes.h" |
| +#include "base/memory/ref_counted.h" |
| #include "base/memory/scoped_ptr.h" |
| -#include "content/browser/speech/audio_encoder.h" |
| #include "content/browser/speech/endpointer/endpointer.h" |
| -#include "content/browser/speech/speech_recognition_request.h" |
| +#include "content/browser/speech/speech_recognition_engine.h" |
| #include "content/public/browser/speech_recognizer.h" |
| #include "content/public/common/speech_recognition_result.h" |
| #include "media/audio/audio_input_controller.h" |
| +#include "net/url_request/url_request_context_getter.h" |
| -class AudioManager; |
| +namespace media { |
| +class AudioInputController; |
| +} |
| namespace content { |
| +struct SpeechRecognitionError; |
| class SpeechRecognitionEventListener; |
| } |
| namespace speech { |
| - |
| -// Records audio, sends recorded audio to server and translates server response |
| -// to recognition result. |
| +// TODO(primiano) Current CL: check CONTENT_EXPORT and NON_EXPORTED_BASE |
|
hans
2012/03/16 11:12:56
should this be addressed before this cl is landed?
Primiano Tucci (use gerrit)
2012/03/16 15:03:42
Done.
|
| +// TODO(primiano) Next CL: Remove the Impl suffix and the exported |
| +// /content/public/browser/speech_recognizer.h interface since this class should |
| +// not be visible outside (currently we need it for speech input extension API). |
| + |
| +// Handles speech recognition for a session (identified by |caller_id|), taking |
| +// care of audio capture, silence detection/endpointer and interaction with the |
| +// SpeechRecognitionEngine (that must be injected through set_delegate(...)). |
|
hans
2012/03/16 11:12:56
i'm confused by the last bit here.. "SpeechRecogni
Primiano Tucci (use gerrit)
2012/03/16 15:03:42
Uh, definitely right. Probably it was just a wrong
|
| class CONTENT_EXPORT SpeechRecognizerImpl |
| : NON_EXPORTED_BASE(public content::SpeechRecognizer), |
| public media::AudioInputController::EventHandler, |
| - public SpeechRecognitionRequestDelegate { |
| + public SpeechRecognitionEngineDelegate { |
| public: |
| static const int kAudioSampleRate; |
| - static const int kAudioPacketIntervalMs; // Duration of each audio packet. |
| static const ChannelLayout kChannelLayout; |
| static const int kNumBitsPerAudioSample; |
| - static const int kNoSpeechTimeoutSec; |
| + static const int kNoSpeechTimeoutMs; |
| static const int kEndpointerEstimationTimeMs; |
| - SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener, |
| - int caller_id, |
| - const std::string& language, |
| - const std::string& grammar, |
| - net::URLRequestContextGetter* context_getter, |
| - bool filter_profanities, |
| - const std::string& hardware_info, |
| - const std::string& origin_url); |
| - |
| + SpeechRecognizerImpl( |
| + content::SpeechRecognitionEventListener* listener, |
| + int caller_id, |
| + SpeechRecognitionEngine* engine); |
| virtual ~SpeechRecognizerImpl(); |
| // content::SpeechRecognizer methods. |
| - virtual bool StartRecognition() OVERRIDE; |
| + virtual void StartRecognition() OVERRIDE; |
| virtual void AbortRecognition() OVERRIDE; |
| virtual void StopAudioCapture() OVERRIDE; |
| virtual bool IsActive() const OVERRIDE; |
| virtual bool IsCapturingAudio() const OVERRIDE; |
| + const SpeechRecognitionEngine& recognition_engine() const; |
| + |
| + private: |
| + friend class SpeechRecognizerImplTest; |
| + |
| + enum FSMState { |
| + kIdle = 0, |
| + kStartingRecognition, |
| + kEstimatingEnvironment, |
| + kWaitingForSpeech, |
| + kRecognizingSpeech, |
| + kWaitingFinalResult, |
| + kMaxState = kWaitingFinalResult |
| + }; |
| + |
| + enum FSMEvent { |
| + kAbortRequest = 0, |
| + kStartRequest, |
| + kStopCaptureRequest, |
| + kAudioData, |
| + kRecognitionResult, |
| + kRecognitionError, |
| + kAudioError, |
| + kMaxEvent = kAudioError |
| + }; |
| + |
| + struct FSMEventArgs { |
| + int audio_error_code; |
| + AudioChunk* audio_data; |
| + content::SpeechRecognitionResult speech_result; |
| + content::SpeechRecognitionError error; |
| + FSMEventArgs(); |
| + }; |
| // AudioInputController::EventHandler methods. |
| virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} |
| @@ -63,47 +96,48 @@ class CONTENT_EXPORT SpeechRecognizerImpl |
| virtual void OnError(media::AudioInputController* controller, |
| int error_code) OVERRIDE; |
| virtual void OnData(media::AudioInputController* controller, |
| - const uint8* data, |
| - uint32 size) OVERRIDE; |
| + const uint8* data, uint32 size) OVERRIDE; |
| - // SpeechRecognitionRequest::Delegate methods. |
| - virtual void SetRecognitionResult( |
| + // SpeechRecognitionEngineDelegate methods. |
| + virtual void OnSpeechEngineResult( |
| const content::SpeechRecognitionResult& result) OVERRIDE; |
| - |
| - private: |
| - friend class SpeechRecognizerImplTest; |
| - |
| - void InformErrorAndAbortRecognition( |
| - content::SpeechRecognitionErrorCode error); |
| - void SendRecordedAudioToServer(); |
| - |
| - void HandleOnError(int error_code); // Handles OnError in the IO thread. |
| - |
| - // Handles OnData in the IO thread. Takes ownership of |raw_audio|. |
| - void HandleOnData(AudioChunk* raw_audio); |
| - |
| - // Helper method which closes the audio controller and blocks until done. |
| + virtual void OnSpeechEngineError( |
| + const content::SpeechRecognitionError& error) OVERRIDE; |
| + |
| + void DispatchEvent(FSMEvent event, FSMEventArgs); |
| + void ProcessAudioPipeline(); |
| + FSMState ProcessEvent(FSMEvent event); |
| + FSMState InitializeAndStartRecording(); |
| + FSMState StartSpeechRecognition(); |
| + FSMState EnvironmentEstimation(); |
| + FSMState DetectUserSpeechOrTimeout(); |
| + FSMState StopCaptureAndWaitForResult(); |
| + FSMState ProcessIntermediateRecognitionResult(); |
| + FSMState ProcessFinalRecognitionResult(); |
| + FSMState Abort(); |
| + FSMState Abort(const content::SpeechRecognitionError& error); |
| + FSMState Abort(bool has_error, const content::SpeechRecognitionError& error); |
| + FSMState DetectEndOfSpeech(); |
| + FSMState DoNothing() const; |
| + int GetElapsedTimeMs() const; |
| + void UpdateSignalAndNoiseLevels(const float& rms); |
| void CloseAudioControllerSynchronously(); |
| - |
| void SetAudioManagerForTesting(AudioManager* audio_manager); |
| content::SpeechRecognitionEventListener* listener_; |
| - int caller_id_; |
| - std::string language_; |
| - std::string grammar_; |
| - bool filter_profanities_; |
| - std::string hardware_info_; |
| - std::string origin_url_; |
| - |
| - scoped_ptr<SpeechRecognitionRequest> request_; |
| - scoped_refptr<media::AudioInputController> audio_controller_; |
| - scoped_refptr<net::URLRequestContextGetter> context_getter_; |
| - AudioEncoder::Codec codec_; |
| - scoped_ptr<AudioEncoder> encoder_; |
| + AudioManager* testing_audio_manager_; |
| + scoped_ptr<SpeechRecognitionEngine> recognition_engine_; |
| Endpointer endpointer_; |
| + scoped_refptr<media::AudioInputController> audio_controller_; |
| + int caller_id_; |
| int num_samples_recorded_; |
| + bool clipper_detected_clip_; |
| float audio_level_; |
| - AudioManager* audio_manager_; |
| + float rms_; |
| + int event_dispatch_nesting_level_; |
| + FSMState state_; |
| + FSMEvent event_; |
| + FSMEventArgs* event_args_; |
| DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); |
| }; |