content/browser/speech/speech_recognizer_impl.h - Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3)

Unified Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed according to Hans review. Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« content/browser/speech/speech_recognition_engine.h ('K') | « content/browser/speech/speech_recognition_request_unittest.cc ('k') | content/browser/speech/speech_recognizer_impl.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/browser/speech/speech_recognizer_impl.h

diff --git a/content/browser/speech/speech_recognizer_impl.h b/content/browser/speech/speech_recognizer_impl.h

index 1b93fcd01b337763c184e5e118806ab9b42c22cd..e63e316108d9f594da5f816cea2c0901cfa25103 100644

--- a/content/browser/speech/speech_recognizer_impl.h

+++ b/content/browser/speech/speech_recognizer_impl.h

@@ -5,57 +5,89 @@

#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

-#include <list>

-#include <utility>

-#include "base/compiler_specific.h"

+#include "base/basictypes.h"

+#include "base/memory/ref_counted.h"

#include "base/memory/scoped_ptr.h"

-#include "content/browser/speech/audio_encoder.h"

#include "content/browser/speech/endpointer/endpointer.h"

-#include "content/browser/speech/speech_recognition_request.h"

+#include "content/browser/speech/speech_recognition_engine.h"

#include "content/public/browser/speech_recognizer.h"

#include "content/public/common/speech_recognition_result.h"

#include "media/audio/audio_input_controller.h"

+#include "net/url_request/url_request_context_getter.h"

-class AudioManager;

+namespace media {

+class AudioInputController;

namespace content {

+struct SpeechRecognitionError;

class SpeechRecognitionEventListener;

}

namespace speech {

-// Records audio, sends recorded audio to server and translates server response

-// to recognition result.

-class CONTENT_EXPORT SpeechRecognizerImpl

- : NON_EXPORTED_BASE(public content::SpeechRecognizer),

+// TODO(primiano) Next CL: Remove the Impl suffix and the exported

+// /content/public/browser/speech_recognizer.h interface since this class should

+// not be visible outside (currently we need it for speech input extension API).

+// Handles speech recognition for a session (identified by |caller_id|), taking

+// care of audio capture, silence detection/endpointer and interaction with the

+// SpeechRecognitionEngine.

+class SpeechRecognizerImpl

+ : public content::SpeechRecognizer,

public media::AudioInputController::EventHandler,

- public SpeechRecognitionRequestDelegate {

+ public SpeechRecognitionEngineDelegate {

public:

static const int kAudioSampleRate;

- static const int kAudioPacketIntervalMs; // Duration of each audio packet.

static const ChannelLayout kChannelLayout;

static const int kNumBitsPerAudioSample;

- static const int kNoSpeechTimeoutSec;

+ static const int kNoSpeechTimeoutMs;

static const int kEndpointerEstimationTimeMs;

- SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener,

- int caller_id,

- const std::string& language,

- const std::string& grammar,

- net::URLRequestContextGetter* context_getter,

- bool filter_profanities,

- const std::string& hardware_info,

- const std::string& origin_url);

+ SpeechRecognizerImpl(

+ content::SpeechRecognitionEventListener* listener,

+ int caller_id,

+ SpeechRecognitionEngine* engine);

virtual ~SpeechRecognizerImpl();

// content::SpeechRecognizer methods.

- virtual bool StartRecognition() OVERRIDE;

+ virtual void StartRecognition() OVERRIDE;

virtual void AbortRecognition() OVERRIDE;

virtual void StopAudioCapture() OVERRIDE;

virtual bool IsActive() const OVERRIDE;

virtual bool IsCapturingAudio() const OVERRIDE;

+ const SpeechRecognitionEngine& recognition_engine() const;

+ private:

+ friend class SpeechRecognizerImplTest;

+ enum FSMState {

+ kIdle = 0,

+ kStartingRecognition,

+ kEstimatingEnvironment,

+ kWaitingForSpeech,

+ kRecognizingSpeech,

+ kWaitingFinalResult,

+ kMaxState = kWaitingFinalResult

+ };

+ enum FSMEvent {

+ kAbortRequest = 0,

+ kStartRequest,

+ kStopCaptureRequest,

+ kAudioData,

+ kRecognitionResult,

+ kRecognitionError,

+ kAudioError,

+ kMaxEvent = kAudioError

+ };

+ struct FSMEventArgs {

+ int audio_error_code;

+ AudioChunk* audio_data;

+ content::SpeechRecognitionResult speech_result;

+ content::SpeechRecognitionError error;

+ FSMEventArgs();

+ };

// AudioInputController::EventHandler methods.

virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}

@@ -63,47 +95,48 @@ class CONTENT_EXPORT SpeechRecognizerImpl

virtual void OnError(media::AudioInputController* controller,

int error_code) OVERRIDE;

virtual void OnData(media::AudioInputController* controller,

- const uint8* data,

- uint32 size) OVERRIDE;

+ const uint8* data, uint32 size) OVERRIDE;

- // SpeechRecognitionRequest::Delegate methods.

- virtual void SetRecognitionResult(

+ // SpeechRecognitionEngineDelegate methods.

+ virtual void OnSpeechEngineResult(

const content::SpeechRecognitionResult& result) OVERRIDE;

- private:

- friend class SpeechRecognizerImplTest;

- void InformErrorAndAbortRecognition(

- content::SpeechRecognitionErrorCode error);

- void SendRecordedAudioToServer();

- void HandleOnError(int error_code); // Handles OnError in the IO thread.

- // Handles OnData in the IO thread. Takes ownership of |raw_audio|.

- void HandleOnData(AudioChunk* raw_audio);

- // Helper method which closes the audio controller and blocks until done.

+ virtual void OnSpeechEngineError(

+ const content::SpeechRecognitionError& error) OVERRIDE;

+ void DispatchEvent(FSMEvent event, FSMEventArgs);

+ void ProcessAudioPipeline();

+ FSMState ProcessEvent(FSMEvent event);

+ FSMState InitializeAndStartRecording();

+ FSMState StartSpeechRecognition();

+ FSMState EnvironmentEstimation();

+ FSMState DetectUserSpeechOrTimeout();

+ FSMState StopCaptureAndWaitForResult();

+ FSMState ProcessIntermediateRecognitionResult();

+ FSMState ProcessFinalRecognitionResult();

+ FSMState Abort();

+ FSMState Abort(const content::SpeechRecognitionError& error);

+ FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);

+ FSMState DetectEndOfSpeech();

+ FSMState DoNothing() const;

+ int GetElapsedTimeMs() const;

+ void UpdateSignalAndNoiseLevels(const float& rms);

void CloseAudioControllerSynchronously();

void SetAudioManagerForTesting(AudioManager* audio_manager);

content::SpeechRecognitionEventListener* listener_;

- int caller_id_;

- std::string language_;

- std::string grammar_;

- bool filter_profanities_;

- std::string hardware_info_;

- std::string origin_url_;

- scoped_ptr<SpeechRecognitionRequest> request_;

- scoped_refptr<media::AudioInputController> audio_controller_;

- scoped_refptr<net::URLRequestContextGetter> context_getter_;

- AudioEncoder::Codec codec_;

- scoped_ptr<AudioEncoder> encoder_;

+ AudioManager* testing_audio_manager_;

+ scoped_ptr<SpeechRecognitionEngine> recognition_engine_;

Endpointer endpointer_;

+ scoped_refptr<media::AudioInputController> audio_controller_;

+ int caller_id_;

int num_samples_recorded_;

+ bool clipper_detected_clip_;

float audio_level_;

- AudioManager* audio_manager_;

+ float rms_;

+ int event_dispatch_nesting_level_;

+ FSMState state_;

+ FSMEvent event_;

+ FSMEventArgs* event_args_;

DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

};