Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed according to (partial) Satish review. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include <list> 8 #include "base/basictypes.h"
9 #include <utility> 9 #include "base/memory/ref_counted.h"
10
11 #include "base/compiler_specific.h"
12 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
13 #include "content/browser/speech/audio_encoder.h"
14 #include "content/browser/speech/endpointer/endpointer.h" 11 #include "content/browser/speech/endpointer/endpointer.h"
15 #include "content/browser/speech/speech_recognition_request.h" 12 #include "content/browser/speech/speech_recognition_engine.h"
16 #include "content/public/browser/speech_recognizer.h" 13 #include "content/public/browser/speech_recognizer.h"
17 #include "content/public/common/speech_recognition_result.h" 14 #include "content/public/common/speech_recognition_result.h"
18 #include "media/audio/audio_input_controller.h" 15 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h"
19 17
20 class AudioManager; 18 namespace media {
19 class AudioInputController;
Satish 2012/03/21 13:29:48 2 spaces to 1 space
20 }
21 21
22 namespace content { 22 namespace content {
23 struct SpeechRecognitionError;
23 class SpeechRecognitionEventListener; 24 class SpeechRecognitionEventListener;
24 } 25 }
25 26
26 namespace speech { 27 namespace speech {
28 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
Satish 2012/03/21 13:29:48 add newline above
29 // /content/public/browser/speech_recognizer.h interface since this class should
30 // not be visible outside (currently we need it for speech input extension API).
27 31
28 // Records audio, sends recorded audio to server and translates server response 32 // Handles speech recognition for a session (identified by |caller_id|), taking
29 // to recognition result. 33 // care of audio capture, silence detection/endpointer and interaction with the
34 // SpeechRecognitionEngine.
30 class CONTENT_EXPORT SpeechRecognizerImpl 35 class CONTENT_EXPORT SpeechRecognizerImpl
31 : NON_EXPORTED_BASE(public content::SpeechRecognizer), 36 : public NON_EXPORTED_BASE(content::SpeechRecognizer),
32 public media::AudioInputController::EventHandler, 37 public media::AudioInputController::EventHandler,
33 public SpeechRecognitionRequestDelegate { 38 public SpeechRecognitionEngineDelegate {
34 public: 39 public:
35 static const int kAudioSampleRate; 40 static const int kAudioSampleRate;
36 static const int kAudioPacketIntervalMs; // Duration of each audio packet.
37 static const ChannelLayout kChannelLayout; 41 static const ChannelLayout kChannelLayout;
38 static const int kNumBitsPerAudioSample; 42 static const int kNumBitsPerAudioSample;
39 static const int kNoSpeechTimeoutSec; 43 static const int kNoSpeechTimeoutMs;
40 static const int kEndpointerEstimationTimeMs; 44 static const int kEndpointerEstimationTimeMs;
41 45
42 SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener, 46 SpeechRecognizerImpl(
43 int caller_id, 47 content::SpeechRecognitionEventListener* listener,
44 const std::string& language, 48 int caller_id,
45 const std::string& grammar, 49 SpeechRecognitionEngine* engine);
46 net::URLRequestContextGetter* context_getter,
47 bool filter_profanities,
48 const std::string& hardware_info,
49 const std::string& origin_url);
50
51 virtual ~SpeechRecognizerImpl(); 50 virtual ~SpeechRecognizerImpl();
52 51
53 // content::SpeechRecognizer methods. 52 // content::SpeechRecognizer methods.
54 virtual bool StartRecognition() OVERRIDE; 53 virtual void StartRecognition() OVERRIDE;
55 virtual void AbortRecognition() OVERRIDE; 54 virtual void AbortRecognition() OVERRIDE;
56 virtual void StopAudioCapture() OVERRIDE; 55 virtual void StopAudioCapture() OVERRIDE;
57 virtual bool IsActive() const OVERRIDE; 56 virtual bool IsActive() const OVERRIDE;
58 virtual bool IsCapturingAudio() const OVERRIDE; 57 virtual bool IsCapturingAudio() const OVERRIDE;
58 const SpeechRecognitionEngine& recognition_engine() const;
59
60 private:
61 friend class SpeechRecognizerImplTest;
62
63 enum FSMState {
Satish 2012/03/21 13:29:48 could the FSM changes be moved to a separate CL as
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
64 kIdle = 0,
65 kStartingRecognition,
66 kEstimatingEnvironment,
67 kWaitingForSpeech,
68 kRecognizingSpeech,
69 kWaitingFinalResult,
70 kMaxState = kWaitingFinalResult
71 };
72
73 enum FSMEvent {
74 kAbortRequest = 0,
75 kStartRequest,
76 kStopCaptureRequest,
77 kAudioData,
78 kRecognitionResult,
79 kRecognitionError,
80 kAudioError,
81 kMaxEvent = kAudioError
82 };
83
84 struct FSMEventArgs {
85 int audio_error_code;
86 AudioChunk* audio_data;
87 content::SpeechRecognitionResult speech_result;
88 content::SpeechRecognitionError error;
89 FSMEventArgs();
90 };
59 91
60 // AudioInputController::EventHandler methods. 92 // AudioInputController::EventHandler methods.
61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} 93 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} 94 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
63 virtual void OnError(media::AudioInputController* controller, 95 virtual void OnError(media::AudioInputController* controller,
64 int error_code) OVERRIDE; 96 int error_code) OVERRIDE;
65 virtual void OnData(media::AudioInputController* controller, 97 virtual void OnData(media::AudioInputController* controller,
66 const uint8* data, 98 const uint8* data, uint32 size) OVERRIDE;
67 uint32 size) OVERRIDE;
68 99
69 // SpeechRecognitionRequest::Delegate methods. 100 // SpeechRecognitionEngineDelegate methods.
70 virtual void SetRecognitionResult( 101 virtual void OnSpeechEngineResult(
71 const content::SpeechRecognitionResult& result) OVERRIDE; 102 const content::SpeechRecognitionResult& result) OVERRIDE;
103 virtual void OnSpeechEngineError(
104 const content::SpeechRecognitionError& error) OVERRIDE;
72 105
73 private: 106 void DispatchEvent(FSMEvent event, FSMEventArgs);
74 friend class SpeechRecognizerImplTest; 107 void ProcessAudioPipeline();
75 108 FSMState ProcessEvent(FSMEvent event);
76 void InformErrorAndAbortRecognition( 109 FSMState InitializeAndStartRecording();
77 content::SpeechRecognitionErrorCode error); 110 FSMState StartSpeechRecognition();
78 void SendRecordedAudioToServer(); 111 FSMState EnvironmentEstimation();
79 112 FSMState DetectUserSpeechOrTimeout();
80 void HandleOnError(int error_code); // Handles OnError in the IO thread. 113 FSMState StopCaptureAndWaitForResult();
81 114 FSMState ProcessIntermediateRecognitionResult();
82 // Handles OnData in the IO thread. Takes ownership of |raw_audio|. 115 FSMState ProcessFinalRecognitionResult();
83 void HandleOnData(AudioChunk* raw_audio); 116 FSMState Abort();
84 117 FSMState Abort(const content::SpeechRecognitionError& error);
85 // Helper method which closes the audio controller and blocks until done. 118 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);
119 FSMState DetectEndOfSpeech();
120 FSMState DoNothing() const;
121 int GetElapsedTimeMs() const;
122 void UpdateSignalAndNoiseLevels(const float& rms);
86 void CloseAudioControllerSynchronously(); 123 void CloseAudioControllerSynchronously();
87
88 void SetAudioManagerForTesting(AudioManager* audio_manager); 124 void SetAudioManagerForTesting(AudioManager* audio_manager);
89 125
90 content::SpeechRecognitionEventListener* listener_; 126 content::SpeechRecognitionEventListener* listener_;
127 AudioManager* testing_audio_manager_;
128 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
129 Endpointer endpointer_;
130 scoped_refptr<media::AudioInputController> audio_controller_;
91 int caller_id_; 131 int caller_id_;
92 std::string language_;
93 std::string grammar_;
94 bool filter_profanities_;
95 std::string hardware_info_;
96 std::string origin_url_;
97
98 scoped_ptr<SpeechRecognitionRequest> request_;
99 scoped_refptr<media::AudioInputController> audio_controller_;
100 scoped_refptr<net::URLRequestContextGetter> context_getter_;
101 AudioEncoder::Codec codec_;
102 scoped_ptr<AudioEncoder> encoder_;
103 Endpointer endpointer_;
104 int num_samples_recorded_; 132 int num_samples_recorded_;
133 bool clipper_detected_clip_;
105 float audio_level_; 134 float audio_level_;
106 AudioManager* audio_manager_; 135 float rms_;
136 int event_dispatch_nesting_level_;
137 FSMState state_;
138 FSMEvent event_;
139 FSMEventArgs* event_args_;
107 140
108 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 141 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
109 }; 142 };
110 143
111 } // namespace speech 144 } // namespace speech
112 145
113 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 146 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698