Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(284)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased from master. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 7
8 #include <list> 8 #include "base/basictypes.h"
9 #include <utility> 9 #include "base/memory/ref_counted.h"
10
11 #include "base/compiler_specific.h"
12 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
13 #include "content/browser/speech/audio_encoder.h"
14 #include "content/browser/speech/endpointer/endpointer.h" 11 #include "content/browser/speech/endpointer/endpointer.h"
15 #include "content/browser/speech/speech_recognition_request.h" 12 #include "content/browser/speech/speech_recognition_engine.h"
16 #include "content/public/browser/speech_recognizer.h" 13 #include "content/public/browser/speech_recognizer.h"
17 #include "content/public/common/speech_recognition_result.h" 14 #include "content/public/common/speech_recognition_result.h"
18 #include "media/audio/audio_input_controller.h" 15 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h"
19 17
20 class AudioManager; 18 namespace media {
19 class AudioInputController;
20 }
21 21
22 namespace content { 22 namespace content {
23 struct SpeechRecognitionError;
23 class SpeechRecognitionEventListener; 24 class SpeechRecognitionEventListener;
24 } 25 }
25 26
26 namespace speech { 27 namespace speech {
28 // TODO(primiano) Current CL: check CONTENT_EXPORT and NON_EXPORTED_BASE
hans 2012/03/16 11:12:56 should this be addressed before this cl is landed?
Primiano Tucci (use gerrit) 2012/03/16 15:03:42 Done.
29 // TODO(primiano) Next CL: Remove the Impl suffix and the exported
30 // /content/public/browser/speech_recognizer.h interface since this class should
31 // not be visible outside (currently we need it for speech input extension API).
27 32
28 // Records audio, sends recorded audio to server and translates server response 33 // Handles speech recognition for a session (identified by |caller_id|), taking
29 // to recognition result. 34 // care of audio capture, silence detection/endpointer and interaction with the
35 // SpeechRecognitionEngine (that must be injected through set_delegate(...)).
hans 2012/03/16 11:12:56 i'm confused by the last bit here.. "SpeechRecogni
Primiano Tucci (use gerrit) 2012/03/16 15:03:42 Uh, definitely right. Probably it was just a wrong
30 class CONTENT_EXPORT SpeechRecognizerImpl 36 class CONTENT_EXPORT SpeechRecognizerImpl
31 : NON_EXPORTED_BASE(public content::SpeechRecognizer), 37 : NON_EXPORTED_BASE(public content::SpeechRecognizer),
32 public media::AudioInputController::EventHandler, 38 public media::AudioInputController::EventHandler,
33 public SpeechRecognitionRequestDelegate { 39 public SpeechRecognitionEngineDelegate {
34 public: 40 public:
35 static const int kAudioSampleRate; 41 static const int kAudioSampleRate;
36 static const int kAudioPacketIntervalMs; // Duration of each audio packet.
37 static const ChannelLayout kChannelLayout; 42 static const ChannelLayout kChannelLayout;
38 static const int kNumBitsPerAudioSample; 43 static const int kNumBitsPerAudioSample;
39 static const int kNoSpeechTimeoutSec; 44 static const int kNoSpeechTimeoutMs;
40 static const int kEndpointerEstimationTimeMs; 45 static const int kEndpointerEstimationTimeMs;
41 46
42 SpeechRecognizerImpl(content::SpeechRecognitionEventListener* listener, 47 SpeechRecognizerImpl(
43 int caller_id, 48 content::SpeechRecognitionEventListener* listener,
44 const std::string& language, 49 int caller_id,
45 const std::string& grammar, 50 SpeechRecognitionEngine* engine);
46 net::URLRequestContextGetter* context_getter,
47 bool filter_profanities,
48 const std::string& hardware_info,
49 const std::string& origin_url);
50
51 virtual ~SpeechRecognizerImpl(); 51 virtual ~SpeechRecognizerImpl();
52 52
53 // content::SpeechRecognizer methods. 53 // content::SpeechRecognizer methods.
54 virtual bool StartRecognition() OVERRIDE; 54 virtual void StartRecognition() OVERRIDE;
55 virtual void AbortRecognition() OVERRIDE; 55 virtual void AbortRecognition() OVERRIDE;
56 virtual void StopAudioCapture() OVERRIDE; 56 virtual void StopAudioCapture() OVERRIDE;
57 virtual bool IsActive() const OVERRIDE; 57 virtual bool IsActive() const OVERRIDE;
58 virtual bool IsCapturingAudio() const OVERRIDE; 58 virtual bool IsCapturingAudio() const OVERRIDE;
59 const SpeechRecognitionEngine& recognition_engine() const;
60
61 private:
62 friend class SpeechRecognizerImplTest;
63
64 enum FSMState {
65 kIdle = 0,
66 kStartingRecognition,
67 kEstimatingEnvironment,
68 kWaitingForSpeech,
69 kRecognizingSpeech,
70 kWaitingFinalResult,
71 kMaxState = kWaitingFinalResult
72 };
73
74 enum FSMEvent {
75 kAbortRequest = 0,
76 kStartRequest,
77 kStopCaptureRequest,
78 kAudioData,
79 kRecognitionResult,
80 kRecognitionError,
81 kAudioError,
82 kMaxEvent = kAudioError
83 };
84
85 struct FSMEventArgs {
86 int audio_error_code;
87 AudioChunk* audio_data;
88 content::SpeechRecognitionResult speech_result;
89 content::SpeechRecognitionError error;
90 FSMEventArgs();
91 };
59 92
60 // AudioInputController::EventHandler methods. 93 // AudioInputController::EventHandler methods.
61 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {} 94 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
62 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {} 95 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
63 virtual void OnError(media::AudioInputController* controller, 96 virtual void OnError(media::AudioInputController* controller,
64 int error_code) OVERRIDE; 97 int error_code) OVERRIDE;
65 virtual void OnData(media::AudioInputController* controller, 98 virtual void OnData(media::AudioInputController* controller,
66 const uint8* data, 99 const uint8* data, uint32 size) OVERRIDE;
67 uint32 size) OVERRIDE;
68 100
69 // SpeechRecognitionRequest::Delegate methods. 101 // SpeechRecognitionEngineDelegate methods.
70 virtual void SetRecognitionResult( 102 virtual void OnSpeechEngineResult(
71 const content::SpeechRecognitionResult& result) OVERRIDE; 103 const content::SpeechRecognitionResult& result) OVERRIDE;
104 virtual void OnSpeechEngineError(
105 const content::SpeechRecognitionError& error) OVERRIDE;
72 106
73 private: 107 void DispatchEvent(FSMEvent event, FSMEventArgs);
74 friend class SpeechRecognizerImplTest; 108 void ProcessAudioPipeline();
75 109 FSMState ProcessEvent(FSMEvent event);
76 void InformErrorAndAbortRecognition( 110 FSMState InitializeAndStartRecording();
77 content::SpeechRecognitionErrorCode error); 111 FSMState StartSpeechRecognition();
78 void SendRecordedAudioToServer(); 112 FSMState EnvironmentEstimation();
79 113 FSMState DetectUserSpeechOrTimeout();
80 void HandleOnError(int error_code); // Handles OnError in the IO thread. 114 FSMState StopCaptureAndWaitForResult();
81 115 FSMState ProcessIntermediateRecognitionResult();
82 // Handles OnData in the IO thread. Takes ownership of |raw_audio|. 116 FSMState ProcessFinalRecognitionResult();
83 void HandleOnData(AudioChunk* raw_audio); 117 FSMState Abort();
84 118 FSMState Abort(const content::SpeechRecognitionError& error);
85 // Helper method which closes the audio controller and blocks until done. 119 FSMState Abort(bool has_error, const content::SpeechRecognitionError& error);
120 FSMState DetectEndOfSpeech();
121 FSMState DoNothing() const;
122 int GetElapsedTimeMs() const;
123 void UpdateSignalAndNoiseLevels(const float& rms);
86 void CloseAudioControllerSynchronously(); 124 void CloseAudioControllerSynchronously();
87
88 void SetAudioManagerForTesting(AudioManager* audio_manager); 125 void SetAudioManagerForTesting(AudioManager* audio_manager);
89 126
90 content::SpeechRecognitionEventListener* listener_; 127 content::SpeechRecognitionEventListener* listener_;
128 AudioManager* testing_audio_manager_;
129 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
130 Endpointer endpointer_;
131 scoped_refptr<media::AudioInputController> audio_controller_;
91 int caller_id_; 132 int caller_id_;
92 std::string language_;
93 std::string grammar_;
94 bool filter_profanities_;
95 std::string hardware_info_;
96 std::string origin_url_;
97
98 scoped_ptr<SpeechRecognitionRequest> request_;
99 scoped_refptr<media::AudioInputController> audio_controller_;
100 scoped_refptr<net::URLRequestContextGetter> context_getter_;
101 AudioEncoder::Codec codec_;
102 scoped_ptr<AudioEncoder> encoder_;
103 Endpointer endpointer_;
104 int num_samples_recorded_; 133 int num_samples_recorded_;
134 bool clipper_detected_clip_;
105 float audio_level_; 135 float audio_level_;
106 AudioManager* audio_manager_; 136 float rms_;
137 int event_dispatch_nesting_level_;
138 FSMState state_;
139 FSMEvent event_;
140 FSMEventArgs* event_args_;
107 141
108 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl); 142 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
109 }; 143 };
110 144
111 } // namespace speech 145 } // namespace speech
112 146
113 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_ 147 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698