Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased from master. Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include "base/basictypes.h" 8 #include "base/basictypes.h"
9 #include "base/memory/ref_counted.h"
9 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h" 11 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h" 12 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/public/browser/speech_recognizer.h" 13 #include "content/public/browser/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h" 14 #include "content/public/common/speech_recognition_error.h"
15 #include "content/public/common/speech_recognition_result.h"
14 #include "media/audio/audio_input_controller.h" 16 #include "media/audio/audio_input_controller.h"
15 #include "net/url_request/url_request_context_getter.h" 17 #include "net/url_request/url_request_context_getter.h"
16 18
17 namespace content { 19 namespace content {
18 class SpeechRecognitionEventListener; 20 class SpeechRecognitionEventListener;
19 struct SpeechRecognitionResult;
20 }
21
22 namespace media {
23 class AudioInputController;
24 } 21 }
25 22
26 namespace speech { 23 namespace speech {
24 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
25 // /content/public/browser/speech_recognizer.h interface since this class should
26 // not be visible outside (currently we need it for speech input extension API).
27 27
28 // Records audio, sends recorded audio to server and translates server response 28 // Handles speech recognition for a session (identified by |caller_id|), taking
29 // to recognition result. 29 // care of audio capture, silence detection/endpointer and interaction with the
30 // SpeechRecognitionEngine.
30 class CONTENT_EXPORT SpeechRecognizerImpl 31 class CONTENT_EXPORT SpeechRecognizerImpl
31 : public NON_EXPORTED_BASE(content::SpeechRecognizer), 32 : public NON_EXPORTED_BASE(content::SpeechRecognizer),
32 public media::AudioInputController::EventHandler, 33 public media::AudioInputController::EventHandler,
33 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { 34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
34 public: 35 public:
35 static const int kAudioSampleRate; 36 static const int kAudioSampleRate;
36 static const ChannelLayout kChannelLayout; 37 static const ChannelLayout kChannelLayout;
37 static const int kNumBitsPerAudioSample; 38 static const int kNumBitsPerAudioSample;
38 static const int kNoSpeechTimeoutMs; 39 static const int kNoSpeechTimeoutMs;
39 static const int kEndpointerEstimationTimeMs; 40 static const int kEndpointerEstimationTimeMs;
40 41
41 SpeechRecognizerImpl( 42 SpeechRecognizerImpl(
42 content::SpeechRecognitionEventListener* listener, 43 content::SpeechRecognitionEventListener* listener,
43 int caller_id, 44 int caller_id,
44 const std::string& language, 45 SpeechRecognitionEngine* engine);
45 const std::string& grammar,
46 net::URLRequestContextGetter* context_getter,
47 bool filter_profanities,
48 const std::string& hardware_info,
49 const std::string& origin_url);
50 virtual ~SpeechRecognizerImpl(); 46 virtual ~SpeechRecognizerImpl();
51 47
52 // content::SpeechRecognizer methods. 48 // content::SpeechRecognizer methods.
53 virtual void StartRecognition() OVERRIDE; 49 virtual void StartRecognition() OVERRIDE;
54 virtual void AbortRecognition() OVERRIDE; 50 virtual void AbortRecognition() OVERRIDE;
55 virtual void StopAudioCapture() OVERRIDE; 51 virtual void StopAudioCapture() OVERRIDE;
56 virtual bool IsActive() const OVERRIDE; 52 virtual bool IsActive() const OVERRIDE;
57 virtual bool IsCapturingAudio() const OVERRIDE; 53 virtual bool IsCapturingAudio() const OVERRIDE;
58 const SpeechRecognitionEngine& recognition_engine() const; 54 const SpeechRecognitionEngine& recognition_engine() const;
59 55
56 private:
57 friend class SpeechRecognizerImplTest;
58
59 enum FSMState {
60 kIdle = 0,
Satish 2012/03/27 09:47:42 enum values should be MACRO_STYLE http://dev.chrom
Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done.
61 kStartingRecognition,
62 kEstimatingEnvironment,
63 kWaitingForSpeech,
64 kRecognizingSpeech,
65 kWaitingFinalResult,
66 kMaxState = kWaitingFinalResult
67 };
68
69 enum FSMEvent {
70 kAbortRequest = 0,
Satish 2012/03/27 09:47:42 seems like we can drop the 'Request' suffix in the
Primiano Tucci (use gerrit) 2012/03/28 13:24:44 I added the suffix because their name should repre
71 kStartRequest,
72 kStopCaptureRequest,
73 kAudioData,
74 kRecognitionResult,
75 kRecognitionError,
76 kAudioError,
77 kMaxEvent = kAudioError
78 };
79
80 struct FSMEventArgs {
81 int audio_error_code;
82 AudioChunk* audio_data;
83 content::SpeechRecognitionResult speech_result;
84 content::SpeechRecognitionError error;
Satish 2012/03/27 09:47:42 change to speech_error
Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done.
85 FSMEventArgs();
86 };
87
60 // AudioInputController::EventHandler methods. 88 // AudioInputController::EventHandler methods.
61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} 89 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} 90 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
63 virtual void OnError(media::AudioInputController* controller, 91 virtual void OnError(media::AudioInputController* controller,
64 int error_code) OVERRIDE; 92 int error_code) OVERRIDE;
65 virtual void OnData(media::AudioInputController* controller, 93 virtual void OnData(media::AudioInputController* controller,
66 const uint8* data, 94 const uint8* data, uint32 size) OVERRIDE;
67 uint32 size) OVERRIDE;
68 95
69 // SpeechRecognitionEngineDelegate methods. 96 // SpeechRecognitionEngineDelegate methods.
70 virtual void OnSpeechRecognitionEngineResult( 97 virtual void OnSpeechRecognitionEngineResult(
71 const content::SpeechRecognitionResult& result) OVERRIDE; 98 const content::SpeechRecognitionResult& result) OVERRIDE;
72 virtual void OnSpeechRecognitionEngineError( 99 virtual void OnSpeechRecognitionEngineError(
73 const content::SpeechRecognitionError& error) OVERRIDE; 100 const content::SpeechRecognitionError& error) OVERRIDE;
74 101
75 private: 102 void DispatchEvent(FSMEvent event, FSMEventArgs);
76 friend class SpeechRecognizerImplTest; 103 void ProcessAudioPipeline();
77 104 FSMState ProcessEvent(FSMEvent event);
78 void InformErrorAndAbortRecognition( 105 FSMState InitializeAndStartRecording();
Satish 2012/03/27 09:47:42 rename to StartRecording
Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done.
79 content::SpeechRecognitionErrorCode error); 106 FSMState StartSpeechRecognition();
Satish 2012/03/27 09:47:42 rename to StartRecognitionEngine
Primiano Tucci (use gerrit) 2012/03/28 13:24:44 Done.
80 void SendRecordedAudioToServer(); 107 FSMState EnvironmentEstimation();
81 108 FSMState DetectUserSpeechOrTimeout();
82 void HandleOnError(int error_code); // Handles OnError in the IO thread. 109 FSMState StopCaptureAndWaitForResult();
83 110 FSMState ProcessIntermediateRecognitionResult();
84 // Handles OnData in the IO thread. Takes ownership of |raw_audio|. 111 FSMState ProcessFinalRecognitionResult();
85 void HandleOnData(AudioChunk* raw_audio); 112 FSMState Abort();
86 113 FSMState Abort(const content::SpeechRecognitionError& error);
87 // Helper method which closes the audio controller and blocks until done. 114 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);
115 FSMState DetectEndOfSpeech();
116 FSMState DoNothing() const;
117 int GetElapsedTimeMs() const;
118 void UpdateSignalAndNoiseLevels(const float& rms);
88 void CloseAudioControllerSynchronously(); 119 void CloseAudioControllerSynchronously();
89
90 void SetAudioManagerForTesting(AudioManager* audio_manager); 120 void SetAudioManagerForTesting(AudioManager* audio_manager);
91 121
92 content::SpeechRecognitionEventListener* listener_; 122 content::SpeechRecognitionEventListener* listener_;
93 AudioManager* testing_audio_manager_; 123 AudioManager* testing_audio_manager_;
94 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; 124 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
95 Endpointer endpointer_; 125 Endpointer endpointer_;
96 scoped_refptr<media::AudioInputController> audio_controller_; 126 scoped_refptr<media::AudioInputController> audio_controller_;
97 scoped_refptr<net::URLRequestContextGetter> context_getter_;
98 int caller_id_; 127 int caller_id_;
99 std::string language_;
100 std::string grammar_;
101 bool filter_profanities_;
102 std::string hardware_info_;
103 std::string origin_url_;
104 int num_samples_recorded_; 128 int num_samples_recorded_;
129 bool clipper_detected_clip_;
105 float audio_level_; 130 float audio_level_;
131 float rms_;
132 int event_dispatch_nesting_level_;
133 FSMState state_;
134 FSMEvent event_;
135 FSMEventArgs* event_args_;
106 136
107 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 137 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
108 }; 138 };
109 139
110 } // namespace speech 140 } // namespace speech
111 141
112 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 142 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW
« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698