OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
6 | 6 |
7 #include "base/basictypes.h" | |
7 #include "base/bind.h" | 8 #include "base/bind.h" |
8 #include "base/time.h" | 9 #include "base/time.h" |
9 #include "content/browser/browser_main_loop.h" | 10 #include "content/browser/browser_main_loop.h" |
10 #include "content/browser/speech/audio_buffer.h" | 11 #include "content/browser/speech/audio_buffer.h" |
11 #include "content/browser/speech/google_one_shot_remote_engine.h" | 12 #include "content/browser/speech/google_one_shot_remote_engine.h" |
12 #include "content/public/browser/browser_thread.h" | 13 #include "content/public/browser/browser_thread.h" |
13 #include "content/public/browser/speech_recognition_event_listener.h" | 14 #include "content/public/browser/speech_recognition_event_listener.h" |
14 #include "content/public/browser/speech_recognizer.h" | 15 #include "content/public/browser/speech_recognizer.h" |
15 #include "content/public/common/speech_recognition_error.h" | 16 #include "content/public/common/speech_recognition_error.h" |
16 #include "content/public/common/speech_recognition_result.h" | 17 #include "content/public/common/speech_recognition_result.h" |
17 #include "net/url_request/url_request_context_getter.h" | 18 #include "net/url_request/url_request_context_getter.h" |
18 | 19 |
20 #define NOT_FEASIBLE() do { NOTREACHED(); return state_; } while(0) | |
bulach
2012/04/04 15:38:17
nit: we avoid using macros as much as possible.. t
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Ok. Turned into a regular function like the others
| |
21 | |
19 using content::BrowserMainLoop; | 22 using content::BrowserMainLoop; |
20 using content::BrowserThread; | 23 using content::BrowserThread; |
21 using content::SpeechRecognitionError; | 24 using content::SpeechRecognitionError; |
22 using content::SpeechRecognitionEventListener; | 25 using content::SpeechRecognitionEventListener; |
23 using content::SpeechRecognitionResult; | 26 using content::SpeechRecognitionResult; |
24 using content::SpeechRecognizer; | 27 using content::SpeechRecognizer; |
25 using media::AudioInputController; | 28 using media::AudioInputController; |
26 using media::AudioManager; | 29 using media::AudioManager; |
30 using media::AudioParameters; | |
27 | 31 |
28 namespace { | 32 namespace { |
29 | 33 |
30 // The following constants are related to the volume level indicator shown in | 34 // The following constants are related to the volume level indicator shown in |
31 // the UI for recorded audio. | 35 // the UI for recorded audio. |
32 // Multiplier used when new volume is greater than previous level. | 36 // Multiplier used when new volume is greater than previous level. |
33 const float kUpSmoothingFactor = 1.0f; | 37 const float kUpSmoothingFactor = 1.0f; |
34 // Multiplier used when new volume is lesser than previous level. | 38 // Multiplier used when new volume is lesser than previous level. |
35 const float kDownSmoothingFactor = 0.7f; | 39 const float kDownSmoothingFactor = 0.7f; |
36 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. | 40 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. |
37 const float kAudioMeterMaxDb = 90.31f; | 41 const float kAudioMeterMaxDb = 90.31f; |
38 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. | 42 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. |
39 // Values lower than this will display as empty level-meter. | 43 // Values lower than this will display as empty level-meter. |
40 const float kAudioMeterMinDb = 30.0f; | 44 const float kAudioMeterMinDb = 30.0f; |
41 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; | 45 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; |
42 | 46 |
43 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) | 47 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) |
44 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; | 48 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; |
45 | 49 |
46 // Returns true if more than 5% of the samples are at min or max value. | 50 // Returns true if more than 5% of the samples are at min or max value. |
47 bool DetectClipping(const speech::AudioChunk& chunk) { | 51 bool DetectClipping(const speech::AudioChunk& chunk) { |
48 const int num_samples = chunk.NumSamples(); | 52 const int num_samples = chunk.NumSamples(); |
49 const int16* samples = chunk.SamplesData16(); | 53 const int16* samples = chunk.SamplesData16(); |
50 const int kThreshold = num_samples / 20; | 54 const int kThreshold = num_samples / 20; |
51 int clipping_samples = 0; | 55 int clipping_samples = 0; |
56 | |
52 for (int i = 0; i < num_samples; ++i) { | 57 for (int i = 0; i < num_samples; ++i) { |
53 if (samples[i] <= -32767 || samples[i] >= 32767) { | 58 if (samples[i] <= -32767 || samples[i] >= 32767) { |
54 if (++clipping_samples > kThreshold) | 59 if (++clipping_samples > kThreshold) |
55 return true; | 60 return true; |
56 } | 61 } |
57 } | 62 } |
58 return false; | 63 return false; |
59 } | 64 } |
60 | 65 |
61 } // namespace | 66 } // namespace |
62 | 67 |
63 SpeechRecognizer* SpeechRecognizer::Create( | 68 SpeechRecognizer* SpeechRecognizer::Create( |
64 SpeechRecognitionEventListener* listener, | 69 SpeechRecognitionEventListener* listener, |
65 int caller_id, | 70 int caller_id, |
66 const std::string& language, | 71 const std::string& language, |
67 const std::string& grammar, | 72 const std::string& grammar, |
68 net::URLRequestContextGetter* context_getter, | 73 net::URLRequestContextGetter* context_getter, |
69 bool filter_profanities, | 74 bool filter_profanities, |
70 const std::string& hardware_info, | 75 const std::string& hardware_info, |
71 const std::string& origin_url) { | 76 const std::string& origin_url) { |
77 speech::GoogleOneShotRemoteEngineConfig google_sr_config; | |
bulach
2012/04/04 15:38:17
nit: prefer to call "remote_engine_config"
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
78 google_sr_config.language = language; | |
79 google_sr_config.grammar = grammar; | |
80 google_sr_config.audio_sample_rate = | |
81 speech::SpeechRecognizerImpl::kAudioSampleRate; | |
82 google_sr_config.audio_num_bits_per_sample = | |
83 speech::SpeechRecognizerImpl::kNumBitsPerAudioSample; | |
84 google_sr_config.filter_profanities = filter_profanities; | |
85 google_sr_config.hardware_info = hardware_info; | |
86 google_sr_config.origin_url = origin_url; | |
87 | |
88 speech::GoogleOneShotRemoteEngine* google_sr_engine = | |
bulach
2012/04/04 15:38:17
nit: remote_engine.
also, just to clarify could a
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
89 new speech::GoogleOneShotRemoteEngine(context_getter); | |
90 google_sr_engine->SetConfig(google_sr_config); | |
91 | |
72 return new speech::SpeechRecognizerImpl(listener, | 92 return new speech::SpeechRecognizerImpl(listener, |
73 caller_id, | 93 caller_id, |
74 language, | 94 google_sr_engine); |
75 grammar, | |
76 context_getter, | |
77 filter_profanities, | |
78 hardware_info, | |
79 origin_url); | |
80 } | 95 } |
81 | 96 |
82 namespace speech { | 97 namespace speech { |
83 | 98 |
84 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; | 99 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
85 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; | 100 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; |
86 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; | 101 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
87 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; | 102 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
88 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; | 103 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
89 | 104 |
105 COMPILE_ASSERT(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, | |
106 kNumBitsPerAudioSample_must_be_a_multiple_of_8); | |
107 | |
90 SpeechRecognizerImpl::SpeechRecognizerImpl( | 108 SpeechRecognizerImpl::SpeechRecognizerImpl( |
91 SpeechRecognitionEventListener* listener, | 109 SpeechRecognitionEventListener* listener, |
92 int caller_id, | 110 int caller_id, |
93 const std::string& language, | 111 SpeechRecognitionEngine* engine) |
94 const std::string& grammar, | |
95 net::URLRequestContextGetter* context_getter, | |
96 bool filter_profanities, | |
97 const std::string& hardware_info, | |
98 const std::string& origin_url) | |
99 : listener_(listener), | 112 : listener_(listener), |
100 testing_audio_manager_(NULL), | 113 testing_audio_manager_(NULL), |
114 recognition_engine_(engine), | |
101 endpointer_(kAudioSampleRate), | 115 endpointer_(kAudioSampleRate), |
102 context_getter_(context_getter), | |
103 caller_id_(caller_id), | 116 caller_id_(caller_id), |
104 language_(language), | 117 in_event_dispatching_(false), |
105 grammar_(grammar), | 118 state_(STATE_IDLE) { |
106 filter_profanities_(filter_profanities), | |
107 hardware_info_(hardware_info), | |
108 origin_url_(origin_url), | |
109 num_samples_recorded_(0), | |
110 audio_level_(0.0f) { | |
111 DCHECK(listener_ != NULL); | 119 DCHECK(listener_ != NULL); |
120 DCHECK(recognition_engine_ != NULL); | |
112 endpointer_.set_speech_input_complete_silence_length( | 121 endpointer_.set_speech_input_complete_silence_length( |
113 base::Time::kMicrosecondsPerSecond / 2); | 122 base::Time::kMicrosecondsPerSecond / 2); |
114 endpointer_.set_long_speech_input_complete_silence_length( | 123 endpointer_.set_long_speech_input_complete_silence_length( |
115 base::Time::kMicrosecondsPerSecond); | 124 base::Time::kMicrosecondsPerSecond); |
116 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 125 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
117 endpointer_.StartSession(); | 126 endpointer_.StartSession(); |
127 recognition_engine_->set_delegate(this); | |
118 } | 128 } |
119 | 129 |
120 SpeechRecognizerImpl::~SpeechRecognizerImpl() { | 130 SpeechRecognizerImpl::~SpeechRecognizerImpl() { |
121 // Recording should have stopped earlier due to the endpointer or | |
122 // |StopRecording| being called. | |
123 DCHECK(!audio_controller_.get()); | |
124 DCHECK(!recognition_engine_.get() || | |
125 !recognition_engine_->IsRecognitionPending()); | |
126 endpointer_.EndSession(); | 131 endpointer_.EndSession(); |
127 } | 132 } |
128 | 133 |
134 // ------- Methods that trigger Finite State Machine (FSM) events ------------ | |
135 | |
136 // NOTE:all the external events and requests should be enqueued (PostTask), even | |
137 // if they come from the same (IO) thread, in order to preserve the relationship | |
138 // of causality between events and avoid interleaved event processing due to | |
139 // synchronous callbacks. | |
140 | |
129 void SpeechRecognizerImpl::StartRecognition() { | 141 void SpeechRecognizerImpl::StartRecognition() { |
142 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
143 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
144 this, FSMEventArgs(EVENT_START))); | |
145 } | |
146 | |
147 void SpeechRecognizerImpl::AbortRecognition() { | |
148 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
149 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
150 this, FSMEventArgs(EVENT_ABORT))); | |
151 } | |
152 | |
153 void SpeechRecognizerImpl::StopAudioCapture() { | |
154 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
155 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
156 this, FSMEventArgs(EVENT_STOP_CAPTURE))); | |
157 } | |
158 | |
159 bool SpeechRecognizerImpl::IsActive() const { | |
160 // Checking the FSM state from another thread (thus, while the FSM is | |
161 // potentially concurrently evolving) is meaningless. | |
130 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 162 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
131 DCHECK(!audio_controller_.get()); | 163 return state_ != STATE_IDLE; |
132 DCHECK(!recognition_engine_.get() || | 164 } |
133 !recognition_engine_->IsRecognitionPending()); | 165 |
134 | 166 bool SpeechRecognizerImpl::IsCapturingAudio() const { |
135 // The endpointer needs to estimate the environment/background noise before | 167 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // See IsActive(). |
136 // starting to treat the audio as user input. In |HandleOnData| we wait until | 168 const bool is_capturing_audio = state_ >= STATE_STARTING && |
137 // such time has passed before switching to user input mode. | 169 state_ <= STATE_RECOGNIZING; |
138 endpointer_.SetEnvironmentEstimationMode(); | 170 DCHECK((is_capturing_audio && (audio_controller_.get() != NULL)) || |
139 | 171 (!is_capturing_audio && audio_controller_.get() == NULL)); |
140 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? | 172 return is_capturing_audio; |
141 testing_audio_manager_ : BrowserMainLoop::GetAudioManager(); | |
142 const int samples_per_packet = kAudioSampleRate * | |
143 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000; | |
144 media::AudioParameters params( | |
145 media::AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, | |
146 kAudioSampleRate, kNumBitsPerAudioSample, samples_per_packet); | |
147 audio_controller_ = AudioInputController::Create(audio_manager, this, params); | |
148 DCHECK(audio_controller_.get()); | |
149 VLOG(1) << "SpeechRecognizer starting record."; | |
150 num_samples_recorded_ = 0; | |
151 audio_controller_->Record(); | |
152 } | |
153 | |
154 void SpeechRecognizerImpl::AbortRecognition() { | |
155 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
156 DCHECK(audio_controller_.get() || recognition_engine_.get()); | |
157 | |
158 // Stop recording if required. | |
159 if (audio_controller_.get()) { | |
160 CloseAudioControllerAsynchronously(); | |
161 } | |
162 | |
163 VLOG(1) << "SpeechRecognizer canceling recognition."; | |
164 recognition_engine_.reset(); | |
165 } | |
166 | |
167 void SpeechRecognizerImpl::StopAudioCapture() { | |
168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
169 | |
170 // If audio recording has already stopped and we are in recognition phase, | |
171 // silently ignore any more calls to stop recording. | |
172 if (!audio_controller_.get()) | |
173 return; | |
174 | |
175 CloseAudioControllerAsynchronously(); | |
176 listener_->OnSoundEnd(caller_id_); | |
177 listener_->OnAudioEnd(caller_id_); | |
178 | |
179 // If we haven't got any audio yet end the recognition sequence here. | |
180 if (recognition_engine_ == NULL) { | |
181 // Guard against the listener freeing us until we finish our job. | |
182 scoped_refptr<SpeechRecognizerImpl> me(this); | |
183 listener_->OnRecognitionEnd(caller_id_); | |
184 } else { | |
185 recognition_engine_->AudioChunksEnded(); | |
186 } | |
187 } | 173 } |
188 | 174 |
189 // Invoked in the audio thread. | 175 // Invoked in the audio thread. |
190 void SpeechRecognizerImpl::OnError(AudioInputController* controller, | 176 void SpeechRecognizerImpl::OnError(AudioInputController* controller, |
191 int error_code) { | 177 int error_code) { |
192 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 178 FSMEventArgs event_args(EVENT_AUDIO_ERROR); |
193 base::Bind(&SpeechRecognizerImpl::HandleOnError, | 179 event_args.audio_error_code = error_code; |
194 this, error_code)); | 180 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
195 } | 181 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
196 | 182 this, event_args)); |
197 void SpeechRecognizerImpl::HandleOnError(int error_code) { | |
198 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | |
199 | |
200 // Check if we are still recording before canceling recognition, as | |
201 // recording might have been stopped after this error was posted to the queue | |
202 // by |OnError|. | |
203 if (!audio_controller_.get()) | |
204 return; | |
205 | |
206 InformErrorAndAbortRecognition(content::SPEECH_RECOGNITION_ERROR_AUDIO); | |
207 } | 183 } |
208 | 184 |
209 void SpeechRecognizerImpl::OnData(AudioInputController* controller, | 185 void SpeechRecognizerImpl::OnData(AudioInputController* controller, |
210 const uint8* data, uint32 size) { | 186 const uint8* data, uint32 size) { |
211 if (size == 0) // This could happen when recording stops and is normal. | 187 if (size == 0) // This could happen when audio capture stops and is normal. |
212 return; | 188 return; |
213 scoped_refptr<AudioChunk> raw_audio( | 189 |
214 new AudioChunk(data, | 190 FSMEventArgs event_args(EVENT_AUDIO_DATA); |
215 static_cast<size_t>(size), | 191 event_args.audio_data = new AudioChunk(data, static_cast<size_t>(size), |
216 kNumBitsPerAudioSample / 8)); | 192 kNumBitsPerAudioSample / 8); |
217 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 193 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
218 base::Bind(&SpeechRecognizerImpl::HandleOnData, | 194 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
219 this, raw_audio)); | 195 this, event_args)); |
220 } | |
221 | |
222 void SpeechRecognizerImpl::HandleOnData(scoped_refptr<AudioChunk> raw_audio) { | |
223 // Check if we are still recording and if not discard this buffer, as | |
224 // recording might have been stopped after this buffer was posted to the queue | |
225 // by |OnData|. | |
226 if (!audio_controller_.get()) | |
227 return; | |
228 | |
229 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); | |
230 | |
231 float rms; | |
232 endpointer_.ProcessAudio(*raw_audio, &rms); | |
233 bool did_clip = DetectClipping(*raw_audio); | |
234 num_samples_recorded_ += raw_audio->NumSamples(); | |
235 | |
236 if (recognition_engine_ == NULL) { | |
237 // This was the first audio packet recorded, so start a request to the | |
238 // server to send the data and inform the listener. | |
239 listener_->OnAudioStart(caller_id_); | |
240 GoogleOneShotRemoteEngineConfig google_sr_config; | |
241 google_sr_config.language = language_; | |
242 google_sr_config.grammar = grammar_; | |
243 google_sr_config.audio_sample_rate = kAudioSampleRate; | |
244 google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample; | |
245 google_sr_config.filter_profanities = filter_profanities_; | |
246 google_sr_config.hardware_info = hardware_info_; | |
247 google_sr_config.origin_url = origin_url_; | |
248 GoogleOneShotRemoteEngine* google_sr_engine = | |
249 new GoogleOneShotRemoteEngine(context_getter_.get()); | |
250 google_sr_engine->SetConfig(google_sr_config); | |
251 recognition_engine_.reset(google_sr_engine); | |
252 recognition_engine_->set_delegate(this); | |
253 recognition_engine_->StartRecognition(); | |
254 } | |
255 | |
256 recognition_engine_->TakeAudioChunk(*raw_audio); | |
257 | |
258 if (endpointer_.IsEstimatingEnvironment()) { | |
259 // Check if we have gathered enough audio for the endpointer to do | |
260 // environment estimation and should move on to detect speech/end of speech. | |
261 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | |
262 kAudioSampleRate) / 1000) { | |
263 endpointer_.SetUserInputMode(); | |
264 listener_->OnEnvironmentEstimationComplete(caller_id_); | |
265 } | |
266 return; // No more processing since we are still estimating environment. | |
267 } | |
268 | |
269 // Check if we have waited too long without hearing any speech. | |
270 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); | |
271 if (!speech_was_heard_after_packet && | |
272 num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) { | |
273 InformErrorAndAbortRecognition( | |
274 content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); | |
275 return; | |
276 } | |
277 | |
278 if (!speech_was_heard_before_packet && speech_was_heard_after_packet) | |
279 listener_->OnSoundStart(caller_id_); | |
280 | |
281 // Calculate the input volume to display in the UI, smoothing towards the | |
282 // new level. | |
283 float level = (rms - kAudioMeterMinDb) / | |
284 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
285 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | |
286 if (level > audio_level_) { | |
287 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | |
288 } else { | |
289 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | |
290 } | |
291 | |
292 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | |
293 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
294 noise_level = std::min(std::max(0.0f, noise_level), | |
295 kAudioMeterRangeMaxUnclipped); | |
296 | |
297 listener_->OnAudioLevelsChange(caller_id_, did_clip ? 1.0f : audio_level_, | |
298 noise_level); | |
299 | |
300 if (endpointer_.speech_input_complete()) | |
301 StopAudioCapture(); | |
302 } | 196 } |
303 | 197 |
304 void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} | 198 void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} |
305 | 199 |
306 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( | 200 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( |
307 const content::SpeechRecognitionResult& result) { | 201 const content::SpeechRecognitionResult& result) { |
308 // Guard against the listener freeing us until we finish our job. | 202 FSMEventArgs event_args(EVENT_ENGINE_RESULT); |
203 event_args.engine_result = result; | |
204 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
205 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
206 this, event_args)); | |
207 } | |
208 | |
209 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( | |
210 const content::SpeechRecognitionError& error) { | |
211 FSMEventArgs event_args(EVENT_ENGINE_ERROR); | |
212 event_args.engine_error = error; | |
213 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
214 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
215 this, event_args)); | |
216 } | |
217 | |
218 // ----------------------- Core FSM implementation --------------------------- | |
219 // TODO(primiano) After the changes in the media package (r129173), this class | |
220 // slightly violates the SpeechRecognitionEventListener interface contract. In | |
221 // particular, it is not true anymore that this class can be freed after the | |
222 // OnRecognitionEnd event, since the audio_controller_.Close() asynchronous | |
223 // call can be still in progress after the end event. Currently, it does not | |
224 // represent a problem for the browser itself, since refcounting protects us | |
225 // against such race conditions. However, we should fix this in the next CLs. | |
226 // For instance, tests are currently working just because the | |
227 // TestAudioInputController is not closing asynchronously as the real controller | |
228 // does, but they will become flaky if TestAudioInputController will be fixed. | |
229 | |
230 void SpeechRecognizerImpl::DispatchEvent(const FSMEventArgs& event_args) { | |
231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
232 DCHECK_LE(event_args.event, EVENT_MAX); | |
233 DCHECK_LE(state_, STATE_MAX); | |
234 | |
235 // Event dispatching must be sequential, otherwise it will break all the rules | |
236 // and the assumptions of the finite state automata model. | |
237 DCHECK(!in_event_dispatching_); | |
238 in_event_dispatching_ = true; | |
239 | |
240 // Guard against the delegate freeing us until we finish processing the event. | |
309 scoped_refptr<SpeechRecognizerImpl> me(this); | 241 scoped_refptr<SpeechRecognizerImpl> me(this); |
242 | |
243 if (event_args.event == EVENT_AUDIO_DATA) { | |
244 DCHECK(event_args.audio_data.get() != NULL); | |
245 ProcessAudioPipeline(*event_args.audio_data); | |
246 } | |
247 | |
248 // The audio pipeline must be processed before the event dispatch, otherwise | |
249 // it would take actions according to the future state instead of the current. | |
250 state_ = ExecuteTransitionAndGetNextState(event_args); | |
251 | |
252 in_event_dispatching_ = false; | |
253 } | |
254 | |
255 SpeechRecognizerImpl::FSMState | |
256 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( | |
257 const FSMEventArgs& event_args) { | |
258 const FSMEvent event = event_args.event; | |
259 switch (state_) { | |
260 case STATE_IDLE: | |
261 switch (event) { | |
262 // TODO(primiano) restore UNREACHABLE_CONDITION on EVENT_ABORT and | |
263 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. | |
264 case EVENT_ABORT: | |
265 return DoNothing(event_args); | |
266 case EVENT_START: | |
267 return StartRecording(event_args); | |
268 case EVENT_STOP_CAPTURE: | |
269 return DoNothing(event_args); | |
270 case EVENT_AUDIO_DATA: | |
271 return DoNothing(event_args); // Corner cases related to queued | |
272 case EVENT_ENGINE_RESULT: // messages being lately dispatched. | |
273 return DoNothing(event_args); | |
274 case EVENT_ENGINE_ERROR: | |
275 return DoNothing(event_args); | |
276 case EVENT_AUDIO_ERROR: | |
277 return DoNothing(event_args); | |
bulach
2012/04/04 15:38:17
I find this is a bit hard to follow..
would it be
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Hmm, the point is that is not obvious whether the
| |
278 } | |
279 break; | |
280 case STATE_STARTING: | |
281 switch (event) { | |
282 case EVENT_ABORT: | |
283 return Abort(event_args); | |
284 case EVENT_START: | |
285 NOT_FEASIBLE(); | |
286 case EVENT_STOP_CAPTURE: | |
287 return Abort(event_args); | |
288 case EVENT_AUDIO_DATA: | |
289 return StartRecognitionEngine(event_args); | |
290 case EVENT_ENGINE_RESULT: | |
291 NOT_FEASIBLE(); | |
292 case EVENT_ENGINE_ERROR: | |
293 return Abort(event_args); | |
294 case EVENT_AUDIO_ERROR: | |
295 return Abort(event_args); | |
bulach
2012/04/04 15:38:17
ditto here...
maybe something like:
case EVENT_AUD
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Grouped adjacent cases ending with the same action
| |
296 } | |
297 break; | |
298 case STATE_ESTIMATING_ENVIRONMENT: | |
299 switch (event) { | |
300 case EVENT_ABORT: | |
301 return Abort(event_args); | |
302 case EVENT_START: | |
303 NOT_FEASIBLE(); | |
304 case EVENT_STOP_CAPTURE: | |
305 return StopCaptureAndWaitForResult(event_args); | |
306 case EVENT_AUDIO_DATA: | |
307 return WaitEnvironmentEstimationCompletion(event_args); | |
308 case EVENT_ENGINE_RESULT: | |
309 return ProcessIntermediateResult(event_args); | |
310 case EVENT_ENGINE_ERROR: | |
311 return Abort(event_args); | |
312 case EVENT_AUDIO_ERROR: | |
313 return Abort(event_args); | |
314 } | |
315 break; | |
316 case STATE_WAITING_FOR_SPEECH: | |
317 switch (event) { | |
318 case EVENT_ABORT: | |
319 return Abort(event_args); | |
320 case EVENT_START: | |
321 NOT_FEASIBLE(); | |
322 case EVENT_STOP_CAPTURE: | |
323 return StopCaptureAndWaitForResult(event_args); | |
324 case EVENT_AUDIO_DATA: | |
325 return DetectUserSpeechOrTimeout(event_args); | |
326 case EVENT_ENGINE_RESULT: | |
327 return ProcessIntermediateResult(event_args); | |
328 case EVENT_ENGINE_ERROR: | |
329 return Abort(event_args); | |
330 case EVENT_AUDIO_ERROR: | |
331 return Abort(event_args); | |
332 } | |
333 break; | |
334 case STATE_RECOGNIZING: | |
335 switch (event) { | |
336 case EVENT_ABORT: | |
337 return Abort(event_args); | |
338 case EVENT_START: | |
339 NOT_FEASIBLE(); | |
340 case EVENT_STOP_CAPTURE: | |
341 return StopCaptureAndWaitForResult(event_args); | |
342 case EVENT_AUDIO_DATA: | |
343 return DetectEndOfSpeech(event_args); | |
344 case EVENT_ENGINE_RESULT: | |
345 return ProcessIntermediateResult(event_args); | |
346 case EVENT_ENGINE_ERROR: | |
347 return Abort(event_args); | |
348 case EVENT_AUDIO_ERROR: | |
349 return Abort(event_args); | |
350 } | |
351 break; | |
352 case STATE_WAITING_FINAL_RESULT: | |
353 switch (event) { | |
354 case EVENT_ABORT: | |
355 return Abort(event_args); | |
356 case EVENT_START: | |
357 NOT_FEASIBLE(); | |
358 case EVENT_STOP_CAPTURE: | |
359 return DoNothing(event_args); | |
360 case EVENT_AUDIO_DATA: | |
361 return DoNothing(event_args); | |
362 case EVENT_ENGINE_RESULT: | |
363 return ProcessFinalResult(event_args); | |
364 case EVENT_ENGINE_ERROR: | |
365 return Abort(event_args); | |
366 case EVENT_AUDIO_ERROR: | |
367 return Abort(event_args); | |
368 } | |
369 break; | |
370 } | |
371 NOT_FEASIBLE(); | |
372 } | |
373 | |
374 // ----------- Contract for all the FSM evolution functions below ------------- | |
375 // - Are guaranteed to be executed in the IO thread; | |
376 // - Are guaranteed to be not reentrant (themselves and each other); | |
377 // - event_args members are guaranteed to be stable during the call; | |
378 // - The class won't be freed in the meanwhile due to callbacks; | |
379 // - IsCapturingAudio() returns true if and only if audio_controller_ != NULL. | |
380 | |
381 // TODO(primiano) the audio pipeline is currently serial. However, the | |
382 // clipper->endpointer->vumeter chain and the sr_engine could be parallelized. | |
383 // We should profile the execution to see if it would be worth or not. | |
384 void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) { | |
385 const bool route_to_endpointer = state_ >= STATE_ESTIMATING_ENVIRONMENT && | |
386 state_ <= STATE_RECOGNIZING; | |
387 const bool route_to_sr_engine = route_to_endpointer; | |
388 const bool route_to_vumeter = state_ >= STATE_WAITING_FOR_SPEECH && | |
389 state_ <= STATE_RECOGNIZING; | |
390 const bool clip_detected = DetectClipping(raw_audio); | |
391 float rms = 0; | |
392 | |
393 num_samples_recorded_ += raw_audio.NumSamples(); | |
394 | |
395 if (route_to_endpointer) { | |
bulach
2012/04/04 15:38:17
nit: we normally avoid {} on single line if blocks
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
396 endpointer_.ProcessAudio(raw_audio, &rms); | |
397 } | |
398 if (route_to_vumeter) { | |
399 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. | |
400 UpdateSignalAndNoiseLevels(rms, clip_detected); | |
401 } | |
402 if (route_to_sr_engine) { | |
403 DCHECK(recognition_engine_.get()); | |
404 recognition_engine_->TakeAudioChunk(raw_audio); | |
405 } | |
406 } | |
407 | |
408 SpeechRecognizerImpl::FSMState | |
409 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { | |
410 DCHECK(recognition_engine_.get()); | |
411 DCHECK(!IsCapturingAudio()); | |
412 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? | |
413 testing_audio_manager_ : | |
414 BrowserMainLoop::GetAudioManager(); | |
415 DCHECK(audio_manager != NULL); | |
416 | |
417 VLOG(1) << "SpeechRecognizerImpl starting audio capture."; | |
bulach
2012/04/04 15:38:17
nit: DVLOG?
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
418 num_samples_recorded_ = 0; | |
419 audio_level_ = 0; | |
420 listener_->OnRecognitionStart(caller_id_); | |
421 | |
422 if (!audio_manager->HasAudioInputDevices()) { | |
423 return AbortWithError(SpeechRecognitionError( | |
424 content::SPEECH_RECOGNITION_ERROR_AUDIO, | |
425 content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); | |
426 } | |
427 | |
428 if (audio_manager->IsRecordingInProcess()) { | |
429 return AbortWithError(SpeechRecognitionError( | |
430 content::SPEECH_RECOGNITION_ERROR_AUDIO, | |
431 content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE)); | |
432 } | |
433 | |
434 const int samples_per_packet = (kAudioSampleRate * | |
435 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000; | |
436 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, | |
437 kAudioSampleRate, kNumBitsPerAudioSample, | |
438 samples_per_packet); | |
439 audio_controller_ = AudioInputController::Create(audio_manager, this, params); | |
440 | |
441 if (audio_controller_.get() == NULL) { | |
bulach
2012/04/04 15:38:17
nit: if (!audio_controller_.get()) {
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Hmm is it strict? I feel to violate my moral and e
| |
442 return AbortWithError( | |
443 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO)); | |
444 } | |
445 | |
446 // The endpointer needs to estimate the environment/background noise before | |
447 // starting to treat the audio as user input. We wait in the state | |
448 // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching | |
449 // to user input mode. | |
450 endpointer_.SetEnvironmentEstimationMode(); | |
451 audio_controller_->Record(); | |
452 return STATE_STARTING; | |
453 } | |
454 | |
455 SpeechRecognizerImpl::FSMState | |
456 SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) { | |
457 // This is the first audio packet captured, so the recognition engine is | |
458 // started and the delegate notified about the event. | |
459 DCHECK(recognition_engine_.get()); | |
460 recognition_engine_->StartRecognition(); | |
461 listener_->OnAudioStart(caller_id_); | |
462 | |
463 // This is a little hack, since TakeAudioChunk() is already called by | |
464 // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping | |
465 // the first audio chunk captured after opening the audio device. | |
466 recognition_engine_->TakeAudioChunk(*(event_args.audio_data)); | |
467 return STATE_ESTIMATING_ENVIRONMENT; | |
468 } | |
469 | |
470 SpeechRecognizerImpl::FSMState | |
471 SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) { | |
472 DCHECK(endpointer_.IsEstimatingEnvironment()); | |
473 if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) { | |
474 endpointer_.SetUserInputMode(); | |
475 listener_->OnEnvironmentEstimationComplete(caller_id_); | |
476 return STATE_WAITING_FOR_SPEECH; | |
477 } else { | |
bulach
2012/04/04 15:38:17
nit: here, 491 and 500, remove the final "else" bl
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
478 return STATE_ESTIMATING_ENVIRONMENT; | |
479 } | |
480 } | |
481 | |
482 SpeechRecognizerImpl::FSMState | |
483 SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) { | |
484 if (endpointer_.DidStartReceivingSpeech()) { | |
485 listener_->OnSoundStart(caller_id_); | |
486 return STATE_RECOGNIZING; | |
487 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { | |
488 return AbortWithError( | |
489 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH)); | |
490 } else { | |
491 return STATE_WAITING_FOR_SPEECH; | |
492 } | |
493 } | |
494 | |
495 SpeechRecognizerImpl::FSMState | |
496 SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) { | |
497 if (endpointer_.speech_input_complete()) { | |
498 return StopCaptureAndWaitForResult(event_args); | |
499 } else { | |
500 return STATE_RECOGNIZING; | |
501 } | |
502 } | |
503 | |
504 SpeechRecognizerImpl::FSMState | |
505 SpeechRecognizerImpl::StopCaptureAndWaitForResult(const FSMEventArgs&) { | |
506 DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING); | |
507 | |
508 VLOG(1) << "Concluding recognition"; | |
bulach
2012/04/04 15:38:17
nit: DVLOG?
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
509 CloseAudioControllerAsynchronously(); | |
510 recognition_engine_->AudioChunksEnded(); | |
511 | |
512 if (state_ > STATE_WAITING_FOR_SPEECH) | |
513 listener_->OnSoundEnd(caller_id_); | |
514 | |
515 listener_->OnAudioEnd(caller_id_); | |
516 return STATE_WAITING_FINAL_RESULT; | |
517 } | |
518 | |
519 SpeechRecognizerImpl::FSMState | |
520 SpeechRecognizerImpl::Abort(const FSMEventArgs& event_args) { | |
521 // TODO(primiano) Should raise SPEECH_RECOGNITION_ERROR_ABORTED in lack of | |
522 // other specific error sources (so that it was an explicit abort request). | |
523 // However, SPEECH_RECOGNITION_ERROR_ABORTED is not caught in UI layers | |
bulach
2012/04/04 15:38:17
which UI layers? I think it's about the renderers,
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
524 // and currently would cause an exception. JS will probably need it in future. | |
525 if (event_args.event == EVENT_AUDIO_ERROR) { | |
526 return AbortWithError( | |
527 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO)); | |
528 } else if (event_args.event == EVENT_ENGINE_ERROR) { | |
529 return AbortWithError(event_args.engine_error); | |
530 } | |
531 return AbortWithError(NULL); | |
532 } | |
533 | |
534 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError( | |
535 const SpeechRecognitionError& error) { | |
bulach
2012/04/04 15:38:17
can we avoid this overload?
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Hmm I guess it would make more verbose statements
| |
536 return AbortWithError(&error); | |
537 } | |
538 | |
539 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError( | |
540 const SpeechRecognitionError* error) { | |
541 if (IsCapturingAudio()) | |
542 CloseAudioControllerAsynchronously(); | |
543 | |
544 VLOG(1) << "SpeechRecognizerImpl canceling recognition. "; | |
545 | |
546 // The recognition engine is initialized only after STATE_STARTING. | |
547 if (state_ > STATE_STARTING) { | |
548 DCHECK(recognition_engine_.get()); | |
549 recognition_engine_->EndRecognition(); | |
550 } | |
551 | |
552 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) | |
553 listener_->OnSoundEnd(caller_id_); | |
554 | |
555 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) | |
556 listener_->OnAudioEnd(caller_id_); | |
557 | |
558 if (error != NULL) | |
559 listener_->OnRecognitionError(caller_id_, *error); | |
560 | |
561 listener_->OnRecognitionEnd(caller_id_); | |
562 | |
563 return STATE_IDLE; | |
564 } | |
565 | |
566 SpeechRecognizerImpl::FSMState | |
567 SpeechRecognizerImpl::ProcessIntermediateResult(const FSMEventArgs&) { | |
568 // This is in preparation for future speech recognition functions. | |
bulach
2012/04/04 15:38:17
nit: indent
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
569 NOTREACHED(); | |
570 return state_; | |
571 } | |
572 | |
573 SpeechRecognizerImpl::FSMState | |
574 SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) { | |
575 const SpeechRecognitionResult& result = event_args.engine_result; | |
576 VLOG(1) << "Got valid result"; | |
bulach
2012/04/04 15:38:17
nit: DVLOG
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
Done.
| |
577 recognition_engine_->EndRecognition(); | |
310 listener_->OnRecognitionResult(caller_id_, result); | 578 listener_->OnRecognitionResult(caller_id_, result); |
311 listener_->OnRecognitionEnd(caller_id_); | 579 listener_->OnRecognitionEnd(caller_id_); |
312 } | 580 return STATE_IDLE; |
313 | 581 } |
314 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( | 582 |
315 const content::SpeechRecognitionError& error) { | 583 SpeechRecognizerImpl::FSMState |
316 InformErrorAndAbortRecognition(error.code); | 584 SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const { |
317 } | 585 return state_; // Just keep the current state. |
318 | |
319 void SpeechRecognizerImpl::InformErrorAndAbortRecognition( | |
320 content::SpeechRecognitionErrorCode error) { | |
321 DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE); | |
322 AbortRecognition(); | |
323 | |
324 // Guard against the listener freeing us until we finish our job. | |
325 scoped_refptr<SpeechRecognizerImpl> me(this); | |
326 listener_->OnRecognitionError(caller_id_, error); | |
327 } | 586 } |
328 | 587 |
329 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { | 588 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { |
330 VLOG(1) << "SpeechRecognizer stopping record."; | 589 DCHECK(IsCapturingAudio()); |
590 VLOG(1) << "SpeechRecognizerImpl stopping audio capture."; | |
331 // Issues a Close on the audio controller, passing an empty callback. The only | 591 // Issues a Close on the audio controller, passing an empty callback. The only |
332 // purpose of such callback is to keep the audio controller refcounted until | 592 // purpose of such callback is to keep the audio controller refcounted until |
333 // Close has completed (in the audio thread) and automatically destroy it | 593 // Close has completed (in the audio thread) and automatically destroy it |
334 // afterwards (upon return from OnAudioClosed). | 594 // afterwards (upon return from OnAudioClosed). |
335 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, | 595 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, |
336 this, audio_controller_)); | 596 this, audio_controller_)); |
337 audio_controller_ = NULL; // The controller is still refcounted by Bind. | 597 audio_controller_ = NULL; // The controller is still refcounted by Bind. |
338 } | 598 } |
339 | 599 |
340 bool SpeechRecognizerImpl::IsActive() const { | 600 int SpeechRecognizerImpl::GetElapsedTimeMs() const { |
341 return (recognition_engine_.get() != NULL); | 601 return (num_samples_recorded_ * 1000) / kAudioSampleRate; |
342 } | 602 } |
343 | 603 |
344 bool SpeechRecognizerImpl::IsCapturingAudio() const { | 604 void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms, |
345 return (audio_controller_.get() != NULL); | 605 bool clip_detected) { |
606 // Calculate the input volume to display in the UI, smoothing towards the | |
607 // new level. | |
608 // TODO(primiano) Do we really need all this floating point arith here? | |
609 // Perhaps it might be quite expensive on mobile. | |
610 float level = (rms - kAudioMeterMinDb) / | |
611 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
612 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | |
613 if (level > audio_level_) { | |
614 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | |
bulach
2012/04/04 15:38:17
nit: you can probably simplify this with:
const s
Primiano Tucci (use gerrit)
2012/04/11 10:05:41
It was code "inherited" from the original class, b
| |
615 } else { | |
616 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | |
617 } | |
618 | |
619 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | |
620 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
621 noise_level = std::min(std::max(0.0f, noise_level), | |
622 kAudioMeterRangeMaxUnclipped); | |
623 | |
624 listener_->OnAudioLevelsChange( | |
625 caller_id_, clip_detected ? 1.0f : audio_level_, noise_level); | |
346 } | 626 } |
347 | 627 |
348 const SpeechRecognitionEngine& | 628 const SpeechRecognitionEngine& |
349 SpeechRecognizerImpl::recognition_engine() const { | 629 SpeechRecognizerImpl::recognition_engine() const { |
350 return *(recognition_engine_.get()); | 630 return *(recognition_engine_.get()); |
351 } | 631 } |
352 | 632 |
353 void SpeechRecognizerImpl::SetAudioManagerForTesting( | 633 void SpeechRecognizerImpl::SetAudioManagerForTesting( |
354 AudioManager* audio_manager) { | 634 AudioManager* audio_manager) { |
355 testing_audio_manager_ = audio_manager; | 635 testing_audio_manager_ = audio_manager; |
356 } | 636 } |
357 | 637 |
638 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | |
639 : event(event_value), | |
640 audio_error_code(0), | |
641 audio_data(NULL), | |
642 engine_error(content::SPEECH_RECOGNITION_ERROR_NONE) { | |
643 } | |
644 | |
645 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | |
646 } | |
358 | 647 |
359 } // namespace speech | 648 } // namespace speech |
OLD | NEW |