Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(130)

Unified Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed according to (partial) Satish review. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/speech_recognizer_impl.h
diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h
index 1b93fcd01b337763c184e5e118806ab9b42c22cd..55895a83877e1c5fab4aa557985a16dc32db9278 100644
--- a/content/browser/speech/speech_recognizer_impl.h
+++ b/content/browser/speech/speech_recognizer_impl.h
@@ -5,57 +5,89 @@
#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
-#include <list>
-#include <utility>
-
-#include "base/compiler_specific.h"
+#include "base/basictypes.h"
+#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
-#include "content/browser/speech/audio_encoder.h"
#include "content/browser/speech/endpointer/endpointer.h"
-#include "content/browser/speech/speech_recognition_request.h"
+#include "content/browser/speech/speech_recognition_engine.h"
#include "content/public/browser/speech_recognizer.h"
#include "content/public/common/speech_recognition_result.h"
#include "media/audio/audio_input_controller.h"
+#include "net/url_request/url_request_context_getter.h"
-class AudioManager;
+namespace media {
+class AudioInputController;
Satish 2012/03/21 13:29:48 2 spaces to 1 space
+}
namespace content {
+struct SpeechRecognitionError;
class SpeechRecognitionEventListener;
}
namespace speech {
+// TODO(primiano) Next CL: Remove the Impl suffix and the exported
Satish 2012/03/21 13:29:48 add newline above
+// /content/public/browser/speech_recognizer.h interface since this class should
+// not be visible outside (currently we need it for speech input extension API).
-// Records audio, sends recorded audio to server and translates server response
-// to recognition result.
+// Handles speech recognition for a session (identified by |caller_id|), taking
+// care of audio capture, silence detection/endpointer and interaction with the
+// SpeechRecognitionEngine.
class CONTENT_EXPORT SpeechRecognizerImpl
- : NON_EXPORTED_BASE(public content::SpeechRecognizer),
+ : public NON_EXPORTED_BASE(content::SpeechRecognizer),
public media::AudioInputController::EventHandler,
- public SpeechRecognitionRequestDelegate {
+ public SpeechRecognitionEngineDelegate {
public:
static const int kAudioSampleRate;
- static const int kAudioPacketIntervalMs; // Duration of each audio packet.
static const ChannelLayout kChannelLayout;
static const int kNumBitsPerAudioSample;
- static const int kNoSpeechTimeoutSec;
+ static const int kNoSpeechTimeoutMs;
static const int kEndpointerEstimationTimeMs;
- SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener,
- int caller_id,
- const std::string& language,
- const std::string& grammar,
- net::URLRequestContextGetter* context_getter,
- bool filter_profanities,
- const std::string& hardware_info,
- const std::string& origin_url);
-
+ SpeechRecognizerImpl(
+ content::SpeechRecognitionEventListener* listener,
+ int caller_id,
+ SpeechRecognitionEngine* engine);
virtual ~SpeechRecognizerImpl();
// content::SpeechRecognizer methods.
- virtual bool StartRecognition() OVERRIDE;
+ virtual void StartRecognition() OVERRIDE;
virtual void AbortRecognition() OVERRIDE;
virtual void StopAudioCapture() OVERRIDE;
virtual bool IsActive() const OVERRIDE;
virtual bool IsCapturingAudio() const OVERRIDE;
+ const SpeechRecognitionEngine& recognition_engine() const;
+
+ private:
+ friend class SpeechRecognizerImplTest;
+
+ enum FSMState {
Satish 2012/03/21 13:29:48 could the FSM changes be moved to a separate CL as
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+ kIdle = 0,
+ kStartingRecognition,
+ kEstimatingEnvironment,
+ kWaitingForSpeech,
+ kRecognizingSpeech,
+ kWaitingFinalResult,
+ kMaxState = kWaitingFinalResult
+ };
+
+ enum FSMEvent {
+ kAbortRequest = 0,
+ kStartRequest,
+ kStopCaptureRequest,
+ kAudioData,
+ kRecognitionResult,
+ kRecognitionError,
+ kAudioError,
+ kMaxEvent = kAudioError
+ };
+
+ struct FSMEventArgs {
+ int audio_error_code;
+ AudioChunk* audio_data;
+ content::SpeechRecognitionResult speech_result;
+ content::SpeechRecognitionError error;
+ FSMEventArgs();
+ };
// AudioInputController::EventHandler methods.
virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
@@ -63,47 +95,48 @@ class CONTENT_EXPORT SpeechRecognizerImpl
virtual void OnError(media::AudioInputController* controller,
int error_code) OVERRIDE;
virtual void OnData(media::AudioInputController* controller,
- const uint8* data,
- uint32 size) OVERRIDE;
+ const uint8* data, uint32 size) OVERRIDE;
- // SpeechRecognitionRequest::Delegate methods.
- virtual void SetRecognitionResult(
+ // SpeechRecognitionEngineDelegate methods.
+ virtual void OnSpeechEngineResult(
const content::SpeechRecognitionResult& result) OVERRIDE;
-
- private:
- friend class SpeechRecognizerImplTest;
-
- void InformErrorAndAbortRecognition(
- content::SpeechRecognitionErrorCode error);
- void SendRecordedAudioToServer();
-
- void HandleOnError(int error_code); // Handles OnError in the IO thread.
-
- // Handles OnData in the IO thread. Takes ownership of |raw_audio|.
- void HandleOnData(AudioChunk* raw_audio);
-
- // Helper method which closes the audio controller and blocks until done.
+ virtual void OnSpeechEngineError(
+ const content::SpeechRecognitionError& error) OVERRIDE;
+
+ void DispatchEvent(FSMEvent event, FSMEventArgs);
+ void ProcessAudioPipeline();
+ FSMState ProcessEvent(FSMEvent event);
+ FSMState InitializeAndStartRecording();
+ FSMState StartSpeechRecognition();
+ FSMState EnvironmentEstimation();
+ FSMState DetectUserSpeechOrTimeout();
+ FSMState StopCaptureAndWaitForResult();
+ FSMState ProcessIntermediateRecognitionResult();
+ FSMState ProcessFinalRecognitionResult();
+ FSMState Abort();
+ FSMState Abort(const content::SpeechRecognitionError& error);
+ FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);
+ FSMState DetectEndOfSpeech();
+ FSMState DoNothing() const;
+ int GetElapsedTimeMs() const;
+ void UpdateSignalAndNoiseLevels(const float& rms);
void CloseAudioControllerSynchronously();
-
void SetAudioManagerForTesting(AudioManager* audio_manager);
content::SpeechRecognitionEventListener* listener_;
- int caller_id_;
- std::string language_;
- std::string grammar_;
- bool filter_profanities_;
- std::string hardware_info_;
- std::string origin_url_;
-
- scoped_ptr<SpeechRecognitionRequest> request_;
- scoped_refptr<media::AudioInputController> audio_controller_;
- scoped_refptr<net::URLRequestContextGetter> context_getter_;
- AudioEncoder::Codec codec_;
- scoped_ptr<AudioEncoder> encoder_;
+ AudioManager* testing_audio_manager_;
+ scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
Endpointer endpointer_;
+ scoped_refptr<media::AudioInputController> audio_controller_;
+ int caller_id_;
int num_samples_recorded_;
+ bool clipper_detected_clip_;
float audio_level_;
- AudioManager* audio_manager_;
+ float rms_;
+ int event_dispatch_nesting_level_;
+ FSMState state_;
+ FSMEvent event_;
+ FSMEventArgs* event_args_;
DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
};

Powered by Google App Engine
This is Rietveld 408576698