Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(555)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Minor style fixes. Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include "base/basictypes.h" 8 #include "base/basictypes.h"
9 #include "base/memory/ref_counted.h" 9 #include "base/memory/ref_counted.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
11 #include "content/browser/speech/endpointer/endpointer.h" 11 #include "content/browser/speech/endpointer/endpointer.h"
12 #include "content/browser/speech/speech_recognition_engine.h" 12 #include "content/browser/speech/speech_recognition_engine.h"
13 #include "content/public/browser/speech_recognizer.h" 13 #include "content/public/browser/speech_recognizer.h"
14 #include "content/public/common/speech_recognition_error.h" 14 #include "content/public/common/speech_recognition_error.h"
15 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h" 16 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h" 17 #include "net/url_request/url_request_context_getter.h"
17 18
18 namespace content { 19 namespace content {
19 class SpeechRecognitionEventListener; 20 class SpeechRecognitionEventListener;
20 struct SpeechRecognitionResult;
21 }
22
23 namespace media {
24 class AudioInputController;
25 } 21 }
26 22
27 namespace speech { 23 namespace speech {
24 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
25 // /content/public/browser/speech_recognizer.h interface since this class should
26 // not be visible outside (currently we need it for speech input extension API).
28 27
29 // Records audio, sends recorded audio to server and translates server response 28 // Handles speech recognition for a session (identified by |caller_id|), taking
30 // to recognition result. 29 // care of audio capture, silence detection/endpointer and interaction with the
30 // SpeechRecognitionEngine.
31 class CONTENT_EXPORT SpeechRecognizerImpl 31 class CONTENT_EXPORT SpeechRecognizerImpl
32 : public NON_EXPORTED_BASE(content::SpeechRecognizer), 32 : public NON_EXPORTED_BASE(content::SpeechRecognizer),
33 public media::AudioInputController::EventHandler, 33 public media::AudioInputController::EventHandler,
34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { 34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
35 public: 35 public:
36 static const int kAudioSampleRate; 36 static const int kAudioSampleRate;
37 static const ChannelLayout kChannelLayout; 37 static const ChannelLayout kChannelLayout;
38 static const int kNumBitsPerAudioSample; 38 static const int kNumBitsPerAudioSample;
39 static const int kNoSpeechTimeoutMs; 39 static const int kNoSpeechTimeoutMs;
40 static const int kEndpointerEstimationTimeMs; 40 static const int kEndpointerEstimationTimeMs;
41 41
42 SpeechRecognizerImpl( 42 SpeechRecognizerImpl(
43 content::SpeechRecognitionEventListener* listener, 43 content::SpeechRecognitionEventListener* listener,
44 int caller_id, 44 int caller_id,
45 const std::string& language, 45 SpeechRecognitionEngine* engine);
46 const std::string& grammar,
47 net::URLRequestContextGetter* context_getter,
48 bool filter_profanities,
49 const std::string& hardware_info,
50 const std::string& origin_url);
51 virtual ~SpeechRecognizerImpl(); 46 virtual ~SpeechRecognizerImpl();
52 47
53 // content::SpeechRecognizer methods. 48 // content::SpeechRecognizer methods.
54 virtual void StartRecognition() OVERRIDE; 49 virtual void StartRecognition() OVERRIDE;
55 virtual void AbortRecognition() OVERRIDE; 50 virtual void AbortRecognition() OVERRIDE;
56 virtual void StopAudioCapture() OVERRIDE; 51 virtual void StopAudioCapture() OVERRIDE;
57 virtual bool IsActive() const OVERRIDE; 52 virtual bool IsActive() const OVERRIDE;
58 virtual bool IsCapturingAudio() const OVERRIDE; 53 virtual bool IsCapturingAudio() const OVERRIDE;
59 const SpeechRecognitionEngine& recognition_engine() const; 54 const SpeechRecognitionEngine& recognition_engine() const;
60 55
56 private:
57 friend class SpeechRecognizerImplTest;
58
59 enum FSMState {
60 STATE_IDLE = 0,
61 STATE_STARTING,
62 STATE_ESTIMATING_ENVIRONMENT,
63 STATE_WAITING_FOR_SPEECH,
64 STATE_RECOGNIZING,
65 STATE_WAITING_FINAL_RESULT,
66 STATE_MAX = STATE_WAITING_FINAL_RESULT
67 };
68
69 enum FSMEvent {
70 EVENT_ABORT = 0,
71 EVENT_START,
72 EVENT_STOP_CAPTURE,
73 EVENT_AUDIO_DATA,
74 EVENT_ENGINE_RESULT,
75 EVENT_ENGINE_ERROR,
76 EVENT_AUDIO_ERROR,
77 EVENT_MAX = EVENT_AUDIO_ERROR
78 };
79
80 struct FSMEventArgs {
81 FSMEvent event;
82 int audio_error_code;
83 scoped_refptr<AudioChunk> audio_data;
84 content::SpeechRecognitionResult engine_result;
85 content::SpeechRecognitionError engine_error;
86 FSMEventArgs();
87 ~FSMEventArgs();
88 };
89
90 typedef base::Callback<FSMState(const FSMEventArgs&)> TransitionFunction;
91
61 // AudioInputController::EventHandler methods. 92 // AudioInputController::EventHandler methods.
62 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} 93 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
63 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} 94 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
64 virtual void OnError(media::AudioInputController* controller, 95 virtual void OnError(media::AudioInputController* controller,
65 int error_code) OVERRIDE; 96 int error_code) OVERRIDE;
66 virtual void OnData(media::AudioInputController* controller, 97 virtual void OnData(media::AudioInputController* controller,
67 const uint8* data, 98 const uint8* data, uint32 size) OVERRIDE;
68 uint32 size) OVERRIDE; 99 // Callback called on IO thread by audio_controller->Close().
hans 2012/04/02 16:05:59 nit: maybe put a blank line before this, since it'
Primiano Tucci (use gerrit) 2012/04/03 10:16:39 Done.
100 void OnAudioClosed(media::AudioInputController*);
69 101
70 // SpeechRecognitionEngineDelegate methods. 102 // SpeechRecognitionEngineDelegate methods.
71 virtual void OnSpeechRecognitionEngineResult( 103 virtual void OnSpeechRecognitionEngineResult(
72 const content::SpeechRecognitionResult& result) OVERRIDE; 104 const content::SpeechRecognitionResult& result) OVERRIDE;
73 virtual void OnSpeechRecognitionEngineError( 105 virtual void OnSpeechRecognitionEngineError(
74 const content::SpeechRecognitionError& error) OVERRIDE; 106 const content::SpeechRecognitionError& error) OVERRIDE;
75 107
76 private: 108 void InitializeFSM();
77 friend class SpeechRecognizerImplTest; 109 void DispatchEvent(FSMEvent, FSMEventArgs);
78 110 void ProcessAudioPipeline(const AudioChunk& raw_audio);
79 void InformErrorAndAbortRecognition( 111 FSMState StartRecording(const FSMEventArgs& event_args);
80 content::SpeechRecognitionErrorCode error); 112 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
81 void SendRecordedAudioToServer(); 113 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
82 114 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
83 void HandleOnError(int error_code); // Handles OnError in the IO thread. 115 FSMState StopCaptureAndWaitResult(const FSMEventArgs& event_args);
84 116 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
85 // Handles OnData in the IO thread. 117 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
86 void HandleOnData(scoped_refptr<AudioChunk> raw_audio); 118 FSMState Abort(const FSMEventArgs& event_args);
87 119 FSMState AbortWithError(const content::SpeechRecognitionError* error);
88 void OnAudioClosed(media::AudioInputController*); 120 FSMState AbortWithError(const content::SpeechRecognitionError& error);
89 121 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
90 // Helper method which closes the audio controller and frees it asynchronously 122 FSMState DoNothing(const FSMEventArgs& event_args) const;
91 // without blocking the IO thread. 123 int GetElapsedTimeMs() const;
124 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
92 void CloseAudioControllerAsynchronously(); 125 void CloseAudioControllerAsynchronously();
93
94 void SetAudioManagerForTesting(AudioManager* audio_manager); 126 void SetAudioManagerForTesting(AudioManager* audio_manager);
95 127
96 content::SpeechRecognitionEventListener* listener_; 128 content::SpeechRecognitionEventListener* listener_;
97 AudioManager* testing_audio_manager_; 129 AudioManager* testing_audio_manager_;
98 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; 130 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
99 Endpointer endpointer_; 131 Endpointer endpointer_;
100 scoped_refptr<media::AudioInputController> audio_controller_; 132 scoped_refptr<media::AudioInputController> audio_controller_;
101 scoped_refptr<net::URLRequestContextGetter> context_getter_;
102 int caller_id_; 133 int caller_id_;
103 std::string language_;
104 std::string grammar_;
105 bool filter_profanities_;
106 std::string hardware_info_;
107 std::string origin_url_;
108 int num_samples_recorded_; 134 int num_samples_recorded_;
109 float audio_level_; 135 float audio_level_;
136 bool in_event_dispatching_;
137 FSMState state_;
138 TransitionFunction fsm[STATE_MAX + 1][EVENT_MAX + 1];
139 const TransitionFunction kUnfeasibleTransition;
110 140
111 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 141 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
112 }; 142 };
113 143
114 } // namespace speech 144 } // namespace speech
115 145
116 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 146 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW
« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698