Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(307)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9835049: Speech refactoring: Reimplemented speech_recognizer as a FSM. (CL1.5) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed according to Bulach review. Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include "base/basictypes.h" 8 #include "base/basictypes.h"
9 #include "base/memory/ref_counted.h" 9 #include "base/memory/ref_counted.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
11 #include "content/browser/speech/endpointer/endpointer.h" 11 #include "content/browser/speech/endpointer/endpointer.h"
12 #include "content/browser/speech/speech_recognition_engine.h" 12 #include "content/browser/speech/speech_recognition_engine.h"
13 #include "content/public/browser/speech_recognizer.h" 13 #include "content/public/browser/speech_recognizer.h"
14 #include "content/public/common/speech_recognition_error.h" 14 #include "content/public/common/speech_recognition_error.h"
15 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h" 16 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h" 17 #include "net/url_request/url_request_context_getter.h"
17 18
18 namespace content { 19 namespace content {
19 class SpeechRecognitionEventListener; 20 class SpeechRecognitionEventListener;
20 struct SpeechRecognitionResult; 21 struct SpeechRecognitionResult;
21 } 22 }
22 23
23 namespace media { 24 namespace media {
24 class AudioInputController; 25 class AudioInputController;
25 class AudioManager; 26 class AudioManager;
26 } 27 }
27 28
28 namespace speech { 29 namespace speech {
30 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
Satish 2012/04/12 08:58:33 add newline above full length comments such as thi
Primiano Tucci (use gerrit) 2012/04/12 12:56:48 Done.
31 // /content/public/browser/speech_recognizer.h interface since this class should
32 // not be visible outside (currently we need it for speech input extension API).
29 33
30 // Records audio, sends recorded audio to server and translates server response 34 // Handles speech recognition for a session (identified by |caller_id|), taking
31 // to recognition result. 35 // care of audio capture, silence detection/endpointer and interaction with the
36 // SpeechRecognitionEngine.
32 class CONTENT_EXPORT SpeechRecognizerImpl 37 class CONTENT_EXPORT SpeechRecognizerImpl
33 : public NON_EXPORTED_BASE(content::SpeechRecognizer), 38 : public NON_EXPORTED_BASE(content::SpeechRecognizer),
34 public media::AudioInputController::EventHandler, 39 public media::AudioInputController::EventHandler,
35 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) { 40 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
36 public: 41 public:
37 static const int kAudioSampleRate; 42 static const int kAudioSampleRate;
38 static const ChannelLayout kChannelLayout; 43 static const ChannelLayout kChannelLayout;
39 static const int kNumBitsPerAudioSample; 44 static const int kNumBitsPerAudioSample;
40 static const int kNoSpeechTimeoutMs; 45 static const int kNoSpeechTimeoutMs;
41 static const int kEndpointerEstimationTimeMs; 46 static const int kEndpointerEstimationTimeMs;
42 47
43 SpeechRecognizerImpl( 48 SpeechRecognizerImpl(
44 content::SpeechRecognitionEventListener* listener, 49 content::SpeechRecognitionEventListener* listener,
45 int caller_id, 50 int caller_id,
46 const std::string& language, 51 SpeechRecognitionEngine* engine);
47 const std::string& grammar,
48 net::URLRequestContextGetter* context_getter,
49 bool filter_profanities,
50 const std::string& hardware_info,
51 const std::string& origin_url);
52 virtual ~SpeechRecognizerImpl(); 52 virtual ~SpeechRecognizerImpl();
53 53
54 // content::SpeechRecognizer methods. 54 // content::SpeechRecognizer methods.
55 virtual void StartRecognition() OVERRIDE; 55 virtual void StartRecognition() OVERRIDE;
56 virtual void AbortRecognition() OVERRIDE; 56 virtual void AbortRecognition() OVERRIDE;
57 virtual void StopAudioCapture() OVERRIDE; 57 virtual void StopAudioCapture() OVERRIDE;
58 virtual bool IsActive() const OVERRIDE; 58 virtual bool IsActive() const OVERRIDE;
59 virtual bool IsCapturingAudio() const OVERRIDE; 59 virtual bool IsCapturingAudio() const OVERRIDE;
60 const SpeechRecognitionEngine& recognition_engine() const; 60 const SpeechRecognitionEngine& recognition_engine() const;
61 61
62 private:
63 friend class SpeechRecognizerImplTest;
64
65 enum FSMState {
66 STATE_IDLE = 0,
67 STATE_STARTING,
68 STATE_ESTIMATING_ENVIRONMENT,
69 STATE_WAITING_FOR_SPEECH,
70 STATE_RECOGNIZING,
71 STATE_WAITING_FINAL_RESULT,
72 STATE_MAX = STATE_WAITING_FINAL_RESULT
73 };
74
75 enum FSMEvent {
76 EVENT_ABORT = 0,
77 EVENT_START,
78 EVENT_STOP_CAPTURE,
79 EVENT_AUDIO_DATA,
80 EVENT_ENGINE_RESULT,
81 EVENT_ENGINE_ERROR,
82 EVENT_AUDIO_ERROR,
83 EVENT_MAX = EVENT_AUDIO_ERROR
84 };
85
86 struct FSMEventArgs {
87 explicit FSMEventArgs(FSMEvent event_value);
88 ~FSMEventArgs();
89
90 FSMEvent event;
91 int audio_error_code;
92 scoped_refptr<AudioChunk> audio_data;
93 content::SpeechRecognitionResult engine_result;
94 content::SpeechRecognitionError engine_error;
95 };
96
97 void DispatchEvent(const FSMEventArgs& event_args);
98 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
99 void ProcessAudioPipeline(const AudioChunk& raw_audio);
100 FSMState StartRecording(const FSMEventArgs& event_args);
101 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
102 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
103 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
104 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
105 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
106 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
107 FSMState Abort(const FSMEventArgs& event_args);
108 FSMState AbortWithError(const content::SpeechRecognitionError* error);
109 FSMState AbortWithError(const content::SpeechRecognitionError& error);
110 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
111 FSMState DoNothing(const FSMEventArgs& event_args) const;
112 FSMState NotFeasible(const FSMEventArgs& event_args);
113 int GetElapsedTimeMs() const;
Satish 2012/04/12 08:58:33 can we separate logically the above methods from b
Primiano Tucci (use gerrit) 2012/04/12 12:56:48 Done.
114 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
115 void CloseAudioControllerAsynchronously();
116 void SetAudioManagerForTesting(media::AudioManager* audio_manager);
117
62 // AudioInputController::EventHandler methods. 118 // AudioInputController::EventHandler methods.
63 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} 119 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
64 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} 120 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
65 virtual void OnError(media::AudioInputController* controller, 121 virtual void OnError(media::AudioInputController* controller,
66 int error_code) OVERRIDE; 122 int error_code) OVERRIDE;
67 virtual void OnData(media::AudioInputController* controller, 123 virtual void OnData(media::AudioInputController* controller,
68 const uint8* data, 124 const uint8* data, uint32 size) OVERRIDE;
69 uint32 size) OVERRIDE; 125
126 // Callback called on IO thread by audio_controller->Close().
127 void OnAudioClosed(media::AudioInputController*);
70 128
71 // SpeechRecognitionEngineDelegate methods. 129 // SpeechRecognitionEngineDelegate methods.
72 virtual void OnSpeechRecognitionEngineResult( 130 virtual void OnSpeechRecognitionEngineResult(
73 const content::SpeechRecognitionResult& result) OVERRIDE; 131 const content::SpeechRecognitionResult& result) OVERRIDE;
74 virtual void OnSpeechRecognitionEngineError( 132 virtual void OnSpeechRecognitionEngineError(
75 const content::SpeechRecognitionError& error) OVERRIDE; 133 const content::SpeechRecognitionError& error) OVERRIDE;
76 134
77 private:
78 friend class SpeechRecognizerImplTest;
79
80 void InformErrorAndAbortRecognition(
81 content::SpeechRecognitionErrorCode error);
82 void SendRecordedAudioToServer();
83
84 void HandleOnError(int error_code); // Handles OnError in the IO thread.
85
86 // Handles OnData in the IO thread.
87 void HandleOnData(scoped_refptr<AudioChunk> raw_audio);
88
89 void OnAudioClosed(media::AudioInputController*);
90
91 // Helper method which closes the audio controller and frees it asynchronously
92 // without blocking the IO thread.
93 void CloseAudioControllerAsynchronously();
94
95 void SetAudioManagerForTesting(media::AudioManager* audio_manager);
96
97 content::SpeechRecognitionEventListener* listener_; 135 content::SpeechRecognitionEventListener* listener_;
98 media::AudioManager* testing_audio_manager_; 136 media::AudioManager* testing_audio_manager_;
99 scoped_ptr<SpeechRecognitionEngine> recognition_engine_; 137 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
100 Endpointer endpointer_; 138 Endpointer endpointer_;
101 scoped_refptr<media::AudioInputController> audio_controller_; 139 scoped_refptr<media::AudioInputController> audio_controller_;
102 scoped_refptr<net::URLRequestContextGetter> context_getter_;
103 int caller_id_; 140 int caller_id_;
104 std::string language_;
105 std::string grammar_;
106 bool filter_profanities_;
107 std::string hardware_info_;
108 std::string origin_url_;
109 int num_samples_recorded_; 141 int num_samples_recorded_;
110 float audio_level_; 142 float audio_level_;
143 bool in_event_dispatching_;
Satish 2012/04/12 08:58:33 probably reword as 'is_dispatching_event_'
Primiano Tucci (use gerrit) 2012/04/12 12:56:48 Done.
144 FSMState state_;
111 145
112 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 146 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
113 }; 147 };
114 148
115 } // namespace speech 149 } // namespace speech
116 150
117 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 151 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW
« no previous file with comments | « no previous file | content/browser/speech/speech_recognizer_impl.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698